tracy/server/TracyWorker.cpp

3179 lines
92 KiB
C++
Raw Normal View History

#ifdef _MSC_VER
# include <winsock2.h>
#else
# include <sys/time.h>
#endif
#include <chrono>
#include <mutex>
#include <string.h>
#if ( defined _MSC_VER && _MSVC_LANG >= 201703L ) || __cplusplus >= 201703L
2018-07-17 16:10:49 +00:00
# if __has_include(<execution>)
# include <execution>
# else
# define MY_LIBCPP_SUCKS
# endif
#else
# define MY_LIBCPP_SUCKS
#endif
2018-07-17 16:29:07 +00:00
#ifdef MY_LIBCPP_SUCKS
2018-07-17 16:10:49 +00:00
# include "tracy_pdqsort.h"
#endif
#include "../common/TracyProtocol.hpp"
#include "../common/TracySystem.hpp"
#include "TracyFileRead.hpp"
#include "TracyFileWrite.hpp"
#include "TracyVersion.hpp"
#include "TracyWorker.hpp"
#include "tracy_flat_hash_map.hpp"
namespace tracy
{
2018-04-21 11:45:48 +00:00
static constexpr int FileVersion( uint8_t h5, uint8_t h6, uint8_t h7 )
{
return ( h5 << 16 ) | ( h6 << 8 ) | h7;
}
static const uint8_t FileHeader[8] { 't', 'r', 'a', 'c', 'y', Version::Major, Version::Minor, Version::Patch };
enum { FileHeaderMagic = 5 };
static const int CurrentVersion = FileVersion( Version::Major, Version::Minor, Version::Patch );
static void UpdateLockCountLockable( LockMap& lockmap, size_t pos )
{
auto& timeline = lockmap.timeline;
uint8_t lockingThread;
uint8_t lockCount;
uint64_t waitList;
if( pos == 0 )
{
lockingThread = 0;
lockCount = 0;
waitList = 0;
}
else
{
const auto tl = timeline[pos-1];
lockingThread = tl->lockingThread;
lockCount = tl->lockCount;
waitList = tl->waitList;
}
const auto end = timeline.size();
while( pos != end )
{
const auto tl = timeline[pos];
const auto tbit = uint64_t( 1 ) << tl->thread;
switch( (LockEvent::Type)tl->type )
{
case LockEvent::Type::Wait:
waitList |= tbit;
break;
case LockEvent::Type::Obtain:
assert( lockCount < std::numeric_limits<uint8_t>::max() );
assert( ( waitList & tbit ) != 0 );
waitList &= ~tbit;
lockingThread = tl->thread;
lockCount++;
break;
case LockEvent::Type::Release:
assert( lockCount > 0 );
lockCount--;
break;
default:
break;
}
tl->lockingThread = lockingThread;
tl->waitList = waitList;
tl->lockCount = lockCount;
pos++;
}
}
static void UpdateLockCountSharedLockable( LockMap& lockmap, size_t pos )
{
auto& timeline = lockmap.timeline;
uint8_t lockingThread;
uint8_t lockCount;
uint64_t waitShared;
uint64_t waitList;
uint64_t sharedList;
if( pos == 0 )
{
lockingThread = 0;
lockCount = 0;
waitShared = 0;
waitList = 0;
sharedList = 0;
}
else
{
const auto tl = (LockEventShared*)timeline[pos-1];
lockingThread = tl->lockingThread;
lockCount = tl->lockCount;
waitShared = tl->waitShared;
waitList = tl->waitList;
sharedList = tl->sharedList;
}
const auto end = timeline.size();
// ObtainShared and ReleaseShared should assert on lockCount == 0, but
// due to the async retrieval of data from threads that not possible.
while( pos != end )
{
const auto tl = (LockEventShared*)timeline[pos];
const auto tbit = uint64_t( 1 ) << tl->thread;
switch( (LockEvent::Type)tl->type )
{
case LockEvent::Type::Wait:
waitList |= tbit;
break;
case LockEvent::Type::WaitShared:
waitShared |= tbit;
break;
case LockEvent::Type::Obtain:
assert( lockCount < std::numeric_limits<uint8_t>::max() );
assert( ( waitList & tbit ) != 0 );
waitList &= ~tbit;
lockingThread = tl->thread;
lockCount++;
break;
case LockEvent::Type::Release:
assert( lockCount > 0 );
lockCount--;
break;
case LockEvent::Type::ObtainShared:
assert( ( waitShared & tbit ) != 0 );
assert( ( sharedList & tbit ) == 0 );
waitShared &= ~tbit;
sharedList |= tbit;
break;
case LockEvent::Type::ReleaseShared:
assert( ( sharedList & tbit ) != 0 );
sharedList &= ~tbit;
break;
default:
break;
}
tl->lockingThread = lockingThread;
tl->waitShared = waitShared;
tl->waitList = waitList;
tl->sharedList = sharedList;
tl->lockCount = lockCount;
pos++;
}
}
static inline void UpdateLockCount( LockMap& lockmap, size_t pos )
{
if( lockmap.type == LockType::Lockable )
{
UpdateLockCountLockable( lockmap, pos );
}
else
{
UpdateLockCountSharedLockable( lockmap, pos );
}
}
2018-07-28 15:59:17 +00:00
LoadProgress Worker::s_loadProgress;
Worker::Worker( const char* addr )
: m_addr( addr )
, m_connected( false )
, m_hasData( false )
, m_shutdown( false )
, m_terminate( false )
, m_stream( LZ4_createStreamDecode() )
, m_buffer( new char[TargetFrameSize*3 + 1] )
, m_bufferOffset( 0 )
, m_pendingStrings( 0 )
, m_pendingThreads( 0 )
, m_pendingSourceLocation( 0 )
2018-06-20 21:42:00 +00:00
, m_pendingCallstackFrames( 0 )
2018-07-29 13:33:48 +00:00
, m_traceVersion( CurrentVersion )
{
m_data.sourceLocationExpand.push_back( 0 );
m_data.threadExpand.push_back( 0 );
2018-06-19 19:52:54 +00:00
m_data.callstackPayload.push_back( nullptr );
memset( m_gpuCtxMap, 0, sizeof( m_gpuCtxMap ) );
#ifndef TRACY_NO_STATISTICS
m_data.sourceLocationZonesReady = true;
#endif
m_thread = std::thread( [this] { Exec(); } );
SetThreadName( m_thread, "Tracy Worker" );
}
2018-04-20 14:03:09 +00:00
Worker::Worker( FileRead& f, EventType::Type eventMask )
: m_connected( false )
, m_hasData( true )
, m_shutdown( false )
, m_terminate( false )
, m_stream( nullptr )
, m_buffer( nullptr )
{
m_data.threadExpand.push_back( 0 );
2018-06-19 19:52:54 +00:00
m_data.callstackPayload.push_back( nullptr );
2018-04-21 11:45:48 +00:00
int fileVer = 0;
uint8_t hdr[8];
f.Read( hdr, sizeof( hdr ) );
if( memcmp( FileHeader, hdr, FileHeaderMagic ) == 0 )
{
fileVer = FileVersion( hdr[FileHeaderMagic], hdr[FileHeaderMagic+1], hdr[FileHeaderMagic+2] );
if( fileVer > CurrentVersion )
{
throw UnsupportedVersion( fileVer );
}
2018-04-30 23:47:56 +00:00
f.Read( m_delay );
2018-04-21 11:45:48 +00:00
}
else
{
static_assert( sizeof( m_delay ) == sizeof( hdr ), "Size mismatch" );
2018-04-21 11:45:48 +00:00
memcpy( &m_delay, hdr, sizeof( m_delay ) );
}
2018-07-29 13:33:48 +00:00
m_traceVersion = fileVer;
2018-04-21 11:45:48 +00:00
2018-07-28 15:59:17 +00:00
if( fileVer <= FileVersion( 0, 3, 1 ) )
{
2018-07-28 16:26:00 +00:00
s_loadProgress.total.store( 7, std::memory_order_relaxed );
2018-07-28 15:59:17 +00:00
}
else
{
2018-07-28 16:26:00 +00:00
s_loadProgress.total.store( 8, std::memory_order_relaxed );
2018-07-28 15:59:17 +00:00
}
2018-07-28 16:56:52 +00:00
s_loadProgress.subTotal.store( 0, std::memory_order_relaxed );
s_loadProgress.progress.store( LoadProgress::Initialization, std::memory_order_relaxed );
2018-04-30 23:47:56 +00:00
f.Read( m_resolution );
f.Read( m_timerMul );
f.Read( m_data.lastTime );
2018-07-10 20:56:41 +00:00
if( fileVer >= FileVersion( 0, 3, 200 ) )
{
f.Read( m_data.frameOffset );
}
uint64_t sz;
{
2018-04-30 23:47:56 +00:00
f.Read( sz );
assert( sz < 1024 );
char tmp[1024];
f.Read( tmp, sz );
m_captureName = std::string( tmp, tmp+sz );
}
2018-08-04 17:47:09 +00:00
if( fileVer >= FileVersion( 0, 3, 202 ) )
{
f.Read( sz );
m_data.frames.Data().reserve_and_use( sz );
for( uint64_t i=0; i<sz; i++ )
{
auto ptr = m_slab.AllocInit<FrameData>();
f.Read( &ptr->name, sizeof( ptr->name ) );
2018-08-05 00:09:59 +00:00
f.Read( &ptr->continuous, sizeof( ptr->continuous ) );
2018-08-04 17:47:09 +00:00
uint64_t fsz;
f.Read( &fsz, sizeof( fsz ) );
ptr->frames.reserve_and_use( fsz );
2018-08-05 00:09:59 +00:00
if( ptr->continuous )
{
for( uint64_t i=0; i<fsz; i++ )
{
f.Read( &ptr->frames[i].start, sizeof( int64_t ) );
ptr->frames[i].end = -1;
}
}
else
{
f.Read( ptr->frames.data(), sizeof( FrameEvent ) * fsz );
}
2018-08-04 17:47:09 +00:00
m_data.frames.Data()[i] = ptr;
}
m_data.framesBase = m_data.frames.Data()[0];
assert( m_data.framesBase->name == 0 );
}
else
{
auto ptr = m_slab.AllocInit<FrameData>();
ptr->name = 0;
2018-08-05 00:09:59 +00:00
ptr->continuous = 1;
2018-08-04 17:47:09 +00:00
f.Read( sz );
ptr->frames.reserve_and_use( sz );
2018-08-05 00:09:59 +00:00
for( uint64_t i=0; i<sz; i++ )
{
f.Read( &ptr->frames[i].start, sizeof( int64_t ) );
ptr->frames[i].end = -1;
}
2018-08-04 17:47:09 +00:00
m_data.frames.Data().push_back( ptr );
m_data.framesBase = ptr;
}
flat_hash_map<uint64_t, const char*, nohash<uint64_t>> pointerMap;
2018-04-30 23:47:56 +00:00
f.Read( sz );
2018-07-29 12:13:29 +00:00
m_data.stringData.reserve( sz );
for( uint64_t i=0; i<sz; i++ )
{
2018-04-30 23:47:56 +00:00
uint64_t ptr, ssz;
f.Read2( ptr, ssz );
auto dst = m_slab.Alloc<char>( ssz+1 );
f.Read( dst, ssz );
dst[ssz] = '\0';
m_data.stringData.push_back( dst );
pointerMap.emplace( ptr, dst );
}
2018-04-30 23:47:56 +00:00
f.Read( sz );
for( uint64_t i=0; i<sz; i++ )
{
uint64_t id, ptr;
2018-04-30 23:47:56 +00:00
f.Read2( id, ptr );
m_data.strings.emplace( id, pointerMap.find( ptr )->second );
}
2018-04-30 23:47:56 +00:00
f.Read( sz );
for( uint64_t i=0; i<sz; i++ )
{
uint64_t id, ptr;
2018-04-30 23:47:56 +00:00
f.Read2( id, ptr );
m_data.threadNames.emplace( id, pointerMap.find( ptr )->second );
}
if( fileVer >= FileVersion( 0, 3, 201 ) )
{
f.Read( sz );
m_data.threadExpand.reserve( sz );
}
2018-04-30 23:47:56 +00:00
f.Read( sz );
for( uint64_t i=0; i<sz; i++ )
{
uint64_t ptr;
2018-04-30 23:47:56 +00:00
f.Read( ptr );
SourceLocation srcloc;
2018-04-30 23:47:56 +00:00
f.Read( srcloc );
m_data.sourceLocation.emplace( ptr, srcloc );
}
2018-04-30 23:47:56 +00:00
f.Read( sz );
2018-03-15 20:42:00 +00:00
m_data.sourceLocationExpand.reserve_and_use( sz );
f.Read( m_data.sourceLocationExpand.data(), sizeof( uint64_t ) * sz );
2018-03-18 01:05:33 +00:00
const auto sle = sz;
2018-04-30 23:47:56 +00:00
f.Read( sz );
m_data.sourceLocationPayload.reserve( sz );
for( uint64_t i=0; i<sz; i++ )
{
auto srcloc = m_slab.Alloc<SourceLocation>();
f.Read( srcloc, sizeof( *srcloc ) );
2018-03-15 20:42:00 +00:00
m_data.sourceLocationPayload.push_back_no_space_check( srcloc );
m_data.sourceLocationPayloadMap.emplace( srcloc, uint32_t( i ) );
}
#ifndef TRACY_NO_STATISTICS
m_data.sourceLocationZonesReady = false;
2018-03-18 01:05:33 +00:00
m_data.sourceLocationZones.reserve( sle + sz );
if( fileVer >= FileVersion( 0, 3, 201 ) )
2018-03-18 01:05:33 +00:00
{
f.Read( sz );
for( uint64_t i=0; i<sz; i++ )
{
int32_t id;
uint64_t cnt;
f.Read( id );
f.Read( cnt );
auto status = m_data.sourceLocationZones.emplace( id, SourceLocationZones() );
assert( status.second );
status.first->second.zones.reserve( cnt );
}
2018-03-18 01:05:33 +00:00
}
else
2018-03-18 01:05:33 +00:00
{
for( uint64_t i=1; i<sle; i++ )
{
m_data.sourceLocationZones.emplace( int32_t( i ), SourceLocationZones() );
}
for( uint64_t i=0; i<sz; i++ )
{
m_data.sourceLocationZones.emplace( -int32_t( i + 1 ), SourceLocationZones() );
}
}
#else
if( fileVer >= FileVersion( 0, 3, 201 ) )
{
f.Read( sz );
for( uint64_t i=0; i<sz; i++ )
{
int32_t id;
f.Read( id );
f.Skip( sizeof( uint64_t ) );
m_data.sourceLocationZonesCnt.emplace( id, 0 );
}
}
else
{
for( uint64_t i=1; i<sle; i++ )
{
m_data.sourceLocationZonesCnt.emplace( int32_t( i ), 0 );
}
for( uint64_t i=0; i<sz; i++ )
{
m_data.sourceLocationZonesCnt.emplace( -int32_t( i + 1 ), 0 );
}
2018-03-18 01:05:33 +00:00
}
#endif
2018-03-18 01:05:33 +00:00
s_loadProgress.progress.store( LoadProgress::Locks, std::memory_order_relaxed );
2018-04-30 23:47:56 +00:00
f.Read( sz );
2018-04-20 14:03:09 +00:00
if( eventMask & EventType::Locks )
{
2018-07-28 17:05:01 +00:00
s_loadProgress.subTotal.store( sz, std::memory_order_relaxed );
2018-04-20 14:03:09 +00:00
for( uint64_t i=0; i<sz; i++ )
{
2018-07-28 17:05:01 +00:00
s_loadProgress.subProgress.store( i, std::memory_order_relaxed );
2018-04-20 14:03:09 +00:00
LockMap lockmap;
uint32_t id;
uint64_t tsz;
2018-04-30 23:47:56 +00:00
f.Read( id );
f.Read( lockmap.srcloc );
f.Read( lockmap.type );
f.Read( lockmap.valid );
f.Read( tsz );
for( uint64_t i=0; i<tsz; i++ )
{
2018-04-20 14:03:09 +00:00
uint64_t t;
2018-04-30 23:47:56 +00:00
f.Read( t );
2018-04-20 14:03:09 +00:00
lockmap.threadMap.emplace( t, lockmap.threadList.size() );
lockmap.threadList.emplace_back( t );
}
2018-04-30 23:47:56 +00:00
f.Read( tsz );
2018-04-29 01:21:40 +00:00
lockmap.timeline.reserve_and_use( tsz );
auto ptr = lockmap.timeline.data();
if( fileVer >= FileVersion( 0, 3, 0 ) )
{
if( lockmap.type == LockType::Lockable )
2018-04-20 14:03:09 +00:00
{
for( uint64_t i=0; i<tsz; i++ )
{
auto lev = m_slab.Alloc<LockEvent>();
2018-04-29 01:37:34 +00:00
f.Read( lev, sizeof( LockEvent::time ) + sizeof( LockEvent::srcloc ) + sizeof( LockEvent::thread ) + sizeof( LockEvent::type ) );
2018-04-29 01:21:40 +00:00
*ptr++ = lev;
}
}
else
{
for( uint64_t i=0; i<tsz; i++ )
{
auto lev = m_slab.Alloc<LockEventShared>();
2018-04-29 01:37:34 +00:00
f.Read( lev, sizeof( LockEventShared::time ) + sizeof( LockEventShared::srcloc ) + sizeof( LockEventShared::thread ) + sizeof( LockEventShared::type ) );
2018-04-29 01:21:40 +00:00
*ptr++ = lev;
}
2018-04-20 14:03:09 +00:00
}
}
2018-04-20 14:03:09 +00:00
else
{
if( lockmap.type == LockType::Lockable )
2018-04-20 14:03:09 +00:00
{
for( uint64_t i=0; i<tsz; i++ )
{
auto lev = m_slab.Alloc<LockEvent>();
2018-04-29 01:37:34 +00:00
f.Read( lev, sizeof( LockEvent::time ) + sizeof( LockEvent::srcloc ) + sizeof( LockEvent::thread ) );
f.Skip( sizeof( uint8_t ) );
2018-04-30 23:47:56 +00:00
f.Read( lev->type );
f.Skip( sizeof( uint8_t ) + sizeof( uint64_t ) );
2018-04-29 01:21:40 +00:00
*ptr++ = lev;
}
}
else
{
for( uint64_t i=0; i<tsz; i++ )
{
auto lev = m_slab.Alloc<LockEventShared>();
2018-04-29 01:37:34 +00:00
f.Read( lev, sizeof( LockEventShared::time ) + sizeof( LockEventShared::srcloc ) + sizeof( LockEventShared::thread ) );
f.Skip( sizeof( uint8_t ) );
2018-04-30 23:47:56 +00:00
f.Read( lev->type );
f.Skip( sizeof( uint8_t ) + sizeof( uint64_t ) * 3 );
2018-04-29 01:21:40 +00:00
*ptr++ = lev;
}
2018-04-20 14:03:09 +00:00
}
}
UpdateLockCount( lockmap, 0 );
2018-04-20 14:03:09 +00:00
m_data.lockMap.emplace( id, std::move( lockmap ) );
}
}
else
{
for( uint64_t i=0; i<sz; i++ )
{
LockType type;
uint64_t tsz;
f.Skip( sizeof( uint32_t ) + sizeof( LockMap::srcloc ) );
2018-04-30 23:47:56 +00:00
f.Read( type );
2018-04-20 14:03:09 +00:00
f.Skip( sizeof( LockMap::valid ) );
2018-04-30 23:47:56 +00:00
f.Read( tsz );
2018-04-20 14:03:09 +00:00
f.Skip( tsz * sizeof( uint64_t ) );
2018-04-30 23:47:56 +00:00
f.Read( tsz );
2018-04-21 14:02:36 +00:00
if( fileVer >= FileVersion( 0, 3, 0 ) )
{
f.Skip( tsz * ( sizeof( LockEvent::time ) + sizeof( LockEvent::type ) + sizeof( LockEvent::srcloc ) + sizeof( LockEvent::thread ) ) );
2018-04-21 14:02:36 +00:00
}
else
{
f.Skip( tsz * ( type == LockType::Lockable ? sizeof( LockEvent ) : sizeof( LockEventShared ) ) );
}
}
}
2018-07-28 17:05:01 +00:00
s_loadProgress.subTotal.store( 0, std::memory_order_relaxed );
s_loadProgress.progress.store( LoadProgress::Messages, std::memory_order_relaxed );
flat_hash_map<uint64_t, MessageData*, nohash<uint64_t>> msgMap;
2018-04-30 23:47:56 +00:00
f.Read( sz );
2018-04-20 14:03:09 +00:00
if( eventMask & EventType::Messages )
{
2018-04-20 14:03:09 +00:00
m_data.messages.reserve( sz );
for( uint64_t i=0; i<sz; i++ )
{
uint64_t ptr;
2018-04-30 23:47:56 +00:00
f.Read( ptr );
2018-04-20 14:03:09 +00:00
auto msgdata = m_slab.Alloc<MessageData>();
f.Read( msgdata, sizeof( MessageData::time ) + sizeof( MessageData::ref ) );
if( fileVer <= FileVersion( 0, 3, 0 ) ) f.Skip( 7 );
2018-04-20 14:03:09 +00:00
m_data.messages.push_back_no_space_check( msgdata );
msgMap.emplace( ptr, msgdata );
}
}
else
{
// Prior to 0.3.1 MessageData was saved with padding.
if( fileVer <= FileVersion( 0, 3, 0 ) )
{
f.Skip( sz * ( sizeof( uint64_t ) + 24 ) );
}
else
{
f.Skip( sz * ( sizeof( uint64_t ) + sizeof( MessageData::time ) + sizeof( MessageData::ref ) ) );
}
}
s_loadProgress.progress.store( LoadProgress::Zones, std::memory_order_relaxed );
2018-04-30 23:47:56 +00:00
f.Read( sz );
m_data.threads.reserve( sz );
for( uint64_t i=0; i<sz; i++ )
{
auto td = m_slab.AllocInit<ThreadData>();
2018-05-25 19:10:22 +00:00
uint64_t tid;
f.Read( tid );
td->id = tid;
2018-04-30 23:47:56 +00:00
f.Read( td->count );
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
uint64_t tsz;
f.Read( tsz );
s_loadProgress.subTotal.store( td->count, std::memory_order_relaxed );
2018-07-22 19:15:28 +00:00
if( tsz != 0 )
{
2018-07-22 19:15:28 +00:00
if( fileVer <= FileVersion( 0, 3, 2 ) )
{
ReadTimelinePre033( f, td->timeline, CompressThread( tid ), tsz, fileVer );
}
else
{
ReadTimeline( f, td->timeline, CompressThread( tid ), tsz );
}
}
uint64_t msz;
2018-04-30 23:47:56 +00:00
f.Read( msz );
2018-04-20 14:03:09 +00:00
if( eventMask & EventType::Messages )
{
2018-04-20 14:03:09 +00:00
td->messages.reserve( msz );
for( uint64_t j=0; j<msz; j++ )
{
uint64_t ptr;
2018-04-30 23:47:56 +00:00
f.Read( ptr );
2018-05-25 19:10:22 +00:00
auto md = msgMap[ptr];
td->messages.push_back_no_space_check( md );
md->thread = tid;
2018-04-20 14:03:09 +00:00
}
}
else
{
f.Skip( msz * sizeof( uint64_t ) );
}
2018-03-15 20:42:00 +00:00
m_data.threads.push_back_no_space_check( td );
}
#ifndef TRACY_NO_STATISTICS
m_threadZones = std::thread( [this] {
for( auto& v : m_data.sourceLocationZones )
{
auto& zones = v.second.zones;
#ifdef MY_LIBCPP_SUCKS
pdqsort_branchless( zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->start < rhs.zone->start; } );
#else
2018-05-07 23:40:22 +00:00
std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->start < rhs.zone->start; } );
#endif
}
Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2018-07-13 22:39:01 +00:00
std::lock_guard<TracyMutex> lock( m_data.lock );
m_data.sourceLocationZonesReady = true;
} );
#endif
s_loadProgress.progress.store( LoadProgress::GpuZones, std::memory_order_relaxed );
2018-04-30 23:47:56 +00:00
f.Read( sz );
m_data.gpuData.reserve( sz );
for( uint64_t i=0; i<sz; i++ )
{
auto ctx = m_slab.AllocInit<GpuCtxData>();
2018-04-30 23:47:56 +00:00
f.Read( ctx->thread );
f.Read( ctx->accuracyBits );
f.Read( ctx->count );
s_loadProgress.subTotal.store( ctx->count, std::memory_order_relaxed );
2018-08-05 11:07:58 +00:00
s_loadProgress.subProgress.store( 0, std::memory_order_relaxed );
if( fileVer <= FileVersion( 0, 3, 1 ) )
{
ctx->period = 1.f;
uint64_t tsz;
f.Read( tsz );
2018-07-22 19:15:28 +00:00
if( tsz != 0 )
{
ReadTimelinePre032( f, ctx->timeline, tsz );
}
}
else
{
f.Read( ctx->period );
uint64_t tsz;
f.Read( tsz );
2018-07-22 19:15:28 +00:00
if( tsz != 0 )
{
ReadTimeline( f, ctx->timeline, tsz );
}
}
2018-03-15 20:42:00 +00:00
m_data.gpuData.push_back_no_space_check( ctx );
}
s_loadProgress.progress.store( LoadProgress::Plots, std::memory_order_relaxed );
2018-04-30 23:47:56 +00:00
f.Read( sz );
2018-04-20 14:03:09 +00:00
if( eventMask & EventType::Plots )
{
2018-08-04 14:33:03 +00:00
m_data.plots.Data().reserve( sz );
2018-08-04 13:17:37 +00:00
s_loadProgress.subTotal.store( sz, std::memory_order_relaxed );
2018-04-20 14:03:09 +00:00
for( uint64_t i=0; i<sz; i++ )
{
2018-08-04 13:17:37 +00:00
s_loadProgress.subProgress.store( i, std::memory_order_relaxed );
2018-04-20 14:03:09 +00:00
auto pd = m_slab.AllocInit<PlotData>();
pd->type = PlotType::User;
2018-04-30 23:47:56 +00:00
f.Read( pd->name );
f.Read( pd->min );
f.Read( pd->max );
2018-04-20 14:03:09 +00:00
uint64_t psz;
2018-04-30 23:47:56 +00:00
f.Read( psz );
2018-04-20 14:03:09 +00:00
pd->data.reserve_and_use( psz );
f.Read( pd->data.data(), psz * sizeof( PlotItem ) );
2018-08-04 14:33:03 +00:00
m_data.plots.Data().push_back_no_space_check( pd );
2018-04-20 14:03:09 +00:00
}
}
else
{
for( uint64_t i=0; i<sz; i++ )
{
f.Skip( sizeof( PlotData::name ) + sizeof( PlotData::min ) + sizeof( PlotData::max ) );
uint64_t psz;
2018-04-30 23:47:56 +00:00
f.Read( psz );
2018-04-20 14:03:09 +00:00
f.Skip( psz * sizeof( PlotItem ) );
}
}
2018-04-02 00:05:16 +00:00
// Support pre-0.3 traces
2018-07-28 15:59:17 +00:00
if( fileVer == 0 && f.IsEOF() )
{
s_loadProgress.total.store( 0, std::memory_order_relaxed );
return;
}
2018-04-02 00:05:16 +00:00
2018-07-28 17:05:01 +00:00
s_loadProgress.subTotal.store( 0, std::memory_order_relaxed );
s_loadProgress.progress.store( LoadProgress::Memory, std::memory_order_relaxed );
2018-04-30 23:47:56 +00:00
f.Read( sz );
2018-06-24 15:10:46 +00:00
bool reconstructMemAllocPlot = false;
2018-04-20 14:03:09 +00:00
if( eventMask & EventType::Memory )
2018-04-02 00:05:16 +00:00
{
2018-04-20 14:03:09 +00:00
m_data.memory.data.reserve_and_use( sz );
if( fileVer >= FileVersion( 0, 3, 201 ) )
{
uint64_t activeSz, freesSz;
f.Read2( activeSz, freesSz );
m_data.memory.active.reserve( activeSz );
m_data.memory.frees.reserve( freesSz );
}
2018-04-20 14:03:09 +00:00
auto mem = m_data.memory.data.data();
2018-07-28 17:05:01 +00:00
s_loadProgress.subTotal.store( sz, std::memory_order_relaxed );
2018-04-20 14:03:09 +00:00
for( uint64_t i=0; i<sz; i++ )
2018-04-02 00:05:16 +00:00
{
2018-07-28 17:05:01 +00:00
s_loadProgress.subProgress.store( i, std::memory_order_relaxed );
2018-06-19 19:51:06 +00:00
if( fileVer <= FileVersion( 0, 3, 1 ) )
{
f.Read( mem, sizeof( MemEvent::ptr ) + sizeof( MemEvent::size ) + sizeof( MemEvent::timeAlloc ) + sizeof( MemEvent::timeFree ) );
mem->csAlloc = 0;
mem->csFree = 0;
2018-06-19 19:51:06 +00:00
}
else
{
f.Read( mem, sizeof( MemEvent::ptr ) + sizeof( MemEvent::size ) + sizeof( MemEvent::timeAlloc ) + sizeof( MemEvent::timeFree ) + sizeof( MemEvent::csAlloc ) + sizeof( MemEvent::csFree ) );
2018-06-19 19:51:06 +00:00
}
2018-04-30 23:47:56 +00:00
uint64_t t0, t1;
f.Read2( t0, t1 );
mem->threadAlloc = CompressThread( t0 );
if( t0 == t1 )
{
mem->threadFree = mem->threadAlloc;
}
else
{
2018-04-30 23:47:56 +00:00
mem->threadFree = CompressThread( t1 );
}
2018-04-20 14:03:09 +00:00
if( mem->timeFree < 0 )
{
m_data.memory.active.emplace( mem->ptr, i );
}
2018-05-02 15:59:50 +00:00
else
{
m_data.memory.frees.push_back( i );
}
2018-04-20 14:03:09 +00:00
mem++;
}
2018-04-30 23:47:56 +00:00
f.Read( m_data.memory.high );
f.Read( m_data.memory.low );
f.Read( m_data.memory.usage );
if( sz != 0 )
{
2018-06-24 15:10:46 +00:00
reconstructMemAllocPlot = true;
}
2018-04-20 14:03:09 +00:00
}
else
{
if( fileVer >= FileVersion( 0, 3, 201 ) )
{
f.Skip( 2 * sizeof( uint64_t ) );
}
2018-06-19 19:54:12 +00:00
if( fileVer <= FileVersion( 0, 3, 1 ) )
{
f.Skip( sz * (
sizeof( MemEvent::ptr ) +
sizeof( MemEvent::size ) +
sizeof( MemEvent::timeAlloc ) +
sizeof( MemEvent::timeFree ) +
sizeof( uint64_t ) +
sizeof( uint64_t ) ) );
}
else
{
f.Skip( sz * (
sizeof( MemEvent::ptr ) +
sizeof( MemEvent::size ) +
sizeof( MemEvent::timeAlloc ) +
sizeof( MemEvent::timeFree ) +
sizeof( MemEvent::csAlloc ) +
sizeof( MemEvent::csFree ) +
2018-06-19 19:54:12 +00:00
sizeof( uint64_t ) +
sizeof( uint64_t ) ) );
}
2018-04-20 14:03:09 +00:00
f.Skip( sizeof( MemData::high ) + sizeof( MemData::low ) + sizeof( MemData::usage ) );
2018-04-02 00:05:16 +00:00
}
2018-06-19 20:04:26 +00:00
2018-06-24 15:10:46 +00:00
if( fileVer <= FileVersion( 0, 3, 1 ) ) goto finishLoading;
2018-06-19 20:04:26 +00:00
2018-07-28 17:05:01 +00:00
s_loadProgress.subTotal.store( 0, std::memory_order_relaxed );
s_loadProgress.progress.store( LoadProgress::CallStacks, std::memory_order_relaxed );
2018-06-19 20:04:26 +00:00
f.Read( sz );
m_data.callstackPayload.reserve( sz );
for( uint64_t i=0; i<sz; i++ )
{
uint8_t csz;
f.Read( csz );
const auto memsize = sizeof( VarArray<uint64_t> ) + csz * sizeof( uint64_t );
auto mem = (char*)m_slab.AllocRaw( memsize );
auto data = (uint64_t*)mem;
f.Read( data, csz * sizeof( uint64_t ) );
auto arr = (VarArray<uint64_t>*)( mem + csz * sizeof( uint64_t ) );
new(arr) VarArray<uint64_t>( csz, data );
m_data.callstackPayload.push_back_no_space_check( arr );
}
2018-06-19 23:59:25 +00:00
f.Read( sz );
m_data.callstackFrameMap.reserve( sz );
for( uint64_t i=0; i<sz; i++ )
{
uint64_t ptr;
f.Read( ptr );
auto frame = m_slab.Alloc<CallstackFrame>();
f.Read( frame, sizeof( CallstackFrame ) );
m_data.callstackFrameMap.emplace( ptr, frame );
}
2018-06-24 15:10:46 +00:00
finishLoading:
if( reconstructMemAllocPlot )
{
m_threadMemory = std::thread( [this] { ReconstructMemAllocPlot(); } );
}
2018-07-28 15:59:17 +00:00
s_loadProgress.total.store( 0, std::memory_order_relaxed );
}
Worker::~Worker()
{
Shutdown();
if( m_thread.joinable() ) m_thread.join();
if( m_threadMemory.joinable() ) m_threadMemory.join();
if( m_threadZones.joinable() ) m_threadZones.join();
2018-04-21 18:11:59 +00:00
delete[] m_buffer;
LZ4_freeStreamDecode( m_stream );
2018-04-21 18:12:16 +00:00
2018-04-21 18:34:29 +00:00
for( auto& v : m_data.threads )
{
2018-07-22 19:01:45 +00:00
v->timeline.~Vector();
v->stack.~Vector();
2018-04-21 18:36:33 +00:00
v->messages.~Vector();
2018-04-21 18:34:29 +00:00
}
2018-07-22 19:01:45 +00:00
for( auto& v : m_data.gpuData )
{
v->timeline.~Vector();
v->stack.~Vector();
}
2018-08-04 14:33:03 +00:00
for( auto& v : m_data.plots.Data() )
2018-04-21 18:12:16 +00:00
{
v->~PlotData();
}
2018-08-04 17:47:09 +00:00
for( auto& v : m_data.frames.Data() )
{
v->~FrameData();
}
}
2018-08-08 17:21:53 +00:00
uint64_t Worker::GetLockCount() const
{
uint64_t cnt = 0;
for( auto& l : m_data.lockMap )
{
cnt += l.second.timeline.size();
}
return cnt;
}
uint64_t Worker::GetPlotCount() const
{
uint64_t cnt = 0;
for( auto& p : m_data.plots.Data() )
{
if( p->type != PlotType::Memory )
{
cnt += p->data.size();
}
}
return cnt;
}
2018-08-04 17:47:09 +00:00
int64_t Worker::GetFrameTime( const FrameData& fd, size_t idx ) const
{
2018-08-05 00:09:59 +00:00
if( fd.continuous )
{
2018-08-05 00:09:59 +00:00
if( idx < fd.frames.size() - 1 )
{
return fd.frames[idx+1].start - fd.frames[idx].start;
}
else
{
assert( m_data.lastTime != 0 );
return m_data.lastTime - fd.frames.back().start;
}
}
else
{
2018-08-05 00:09:59 +00:00
if( fd.frames[idx].end >= 0 )
{
return fd.frames[idx].end - fd.frames[idx].start;
}
else
{
return m_data.lastTime - fd.frames.back().start;
}
}
}
2018-08-04 17:47:09 +00:00
int64_t Worker::GetFrameBegin( const FrameData& fd, size_t idx ) const
{
2018-08-04 17:47:09 +00:00
assert( idx < fd.frames.size() );
2018-08-05 00:09:59 +00:00
return fd.frames[idx].start;
}
2018-08-04 17:47:09 +00:00
int64_t Worker::GetFrameEnd( const FrameData& fd, size_t idx ) const
{
2018-08-05 00:09:59 +00:00
if( fd.continuous )
{
2018-08-05 00:09:59 +00:00
if( idx < fd.frames.size() - 1 )
{
return fd.frames[idx+1].start;
}
else
{
return m_data.lastTime;
}
}
else
{
2018-08-05 00:09:59 +00:00
if( fd.frames[idx].end >= 0 )
{
return fd.frames[idx].end;
}
else
{
return m_data.lastTime;
}
}
}
2018-08-04 17:47:09 +00:00
std::pair <int, int> Worker::GetFrameRange( const FrameData& fd, int64_t from, int64_t to )
{
2018-08-05 00:09:59 +00:00
auto zitbegin = std::lower_bound( fd.frames.begin(), fd.frames.end(), from, [] ( const auto& lhs, const auto& rhs ) { return lhs.start < rhs; } );
2018-08-04 21:19:35 +00:00
if( zitbegin == fd.frames.end() ) zitbegin--;
2018-08-05 00:09:59 +00:00
const auto zitend = std::lower_bound( zitbegin, fd.frames.end(), to, [] ( const auto& lhs, const auto& rhs ) { return lhs.start < rhs; } );
2018-08-04 17:47:09 +00:00
int zbegin = std::distance( fd.frames.begin(), zitbegin );
2018-08-05 00:09:59 +00:00
if( zbegin > 0 && zitbegin->start != from ) --zbegin;
2018-08-04 17:47:09 +00:00
const int zend = std::distance( fd.frames.begin(), zitend );
return std::make_pair( zbegin, zend );
}
2018-06-24 14:15:49 +00:00
const CallstackFrame* Worker::GetCallstackFrame( uint64_t ptr ) const
2018-06-19 23:18:59 +00:00
{
auto it = m_data.callstackFrameMap.find( ptr );
if( it == m_data.callstackFrameMap.end() )
{
return nullptr;
}
else
{
return it->second;
}
}
2018-03-23 00:50:38 +00:00
int64_t Worker::GetZoneEnd( const ZoneEvent& ev )
{
auto ptr = &ev;
for(;;)
{
if( ptr->end >= 0 ) return ptr->end;
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
if( ptr->child < 0 ) return ptr->start;
ptr = GetZoneChildren( ptr->child ).back();
}
}
2018-03-23 00:50:38 +00:00
int64_t Worker::GetZoneEnd( const GpuEvent& ev )
{
auto ptr = &ev;
for(;;)
{
if( ptr->gpuEnd >= 0 ) return ptr->gpuEnd;
if( ptr->child < 0 ) return ptr->gpuStart;
ptr = GetGpuChildren( ptr->child ).back();
}
}
const char* Worker::GetString( uint64_t ptr ) const
{
const auto it = m_data.strings.find( ptr );
if( it == m_data.strings.end() || it->second == nullptr )
{
return "???";
}
else
{
return it->second;
}
}
const char* Worker::GetString( const StringRef& ref ) const
{
if( ref.isidx )
{
assert( ref.active );
2018-03-04 16:52:51 +00:00
return m_data.stringData[ref.str];
}
else
{
if( ref.active )
{
2018-03-04 16:52:51 +00:00
return GetString( ref.str );
}
else
{
return "???";
}
}
}
const char* Worker::GetString( const StringIdx& idx ) const
{
assert( idx.active );
return m_data.stringData[idx.idx];
}
const char* Worker::GetThreadString( uint64_t id ) const
{
const auto it = m_data.threadNames.find( id );
if( it == m_data.threadNames.end() )
{
return "???";
}
else
{
return it->second;
}
}
const SourceLocation& Worker::GetSourceLocation( int32_t srcloc ) const
{
if( srcloc < 0 )
{
return *m_data.sourceLocationPayload[-srcloc-1];
}
else
{
const auto it = m_data.sourceLocation.find( m_data.sourceLocationExpand[srcloc] );
assert( it != m_data.sourceLocation.end() );
return it->second;
}
}
const char* Worker::GetZoneName( const ZoneEvent& ev ) const
{
auto& srcloc = GetSourceLocation( ev.srcloc );
return GetZoneName( ev, srcloc );
}
const char* Worker::GetZoneName( const ZoneEvent& ev, const SourceLocation& srcloc ) const
{
2018-06-29 14:12:40 +00:00
if( ev.name.active )
{
return GetString( ev.name );
}
else if( srcloc.name.active )
{
return GetString( srcloc.name );
}
else
{
return GetString( srcloc.function );
}
}
const char* Worker::GetZoneName( const GpuEvent& ev ) const
{
auto& srcloc = GetSourceLocation( ev.srcloc );
return GetZoneName( ev, srcloc );
}
const char* Worker::GetZoneName( const GpuEvent& ev, const SourceLocation& srcloc ) const
{
if( srcloc.name.active )
{
return GetString( srcloc.name );
}
else
{
return GetString( srcloc.function );
}
}
std::vector<int32_t> Worker::GetMatchingSourceLocation( const char* query ) const
{
std::vector<int32_t> match;
const auto sz = m_data.sourceLocationExpand.size();
for( size_t i=1; i<sz; i++ )
{
const auto it = m_data.sourceLocation.find( m_data.sourceLocationExpand[i] );
assert( it != m_data.sourceLocation.end() );
const auto& srcloc = it->second;
const auto str = GetString( srcloc.name.active ? srcloc.name : srcloc.function );
if( strstr( str, query ) != nullptr )
{
match.push_back( (int32_t)i );
}
}
for( auto& srcloc : m_data.sourceLocationPayload )
{
const auto str = GetString( srcloc->name.active ? srcloc->name : srcloc->function );
if( strstr( str, query ) != nullptr )
{
auto it = m_data.sourceLocationPayloadMap.find( srcloc );
assert( it != m_data.sourceLocationPayloadMap.end() );
match.push_back( -int32_t( it->second + 1 ) );
}
}
return match;
}
#ifndef TRACY_NO_STATISTICS
2018-03-18 19:20:24 +00:00
const Worker::SourceLocationZones& Worker::GetZonesForSourceLocation( int32_t srcloc ) const
2018-03-18 01:35:39 +00:00
{
2018-03-18 19:20:24 +00:00
static const SourceLocationZones empty;
2018-03-18 01:35:39 +00:00
auto it = m_data.sourceLocationZones.find( srcloc );
2018-03-18 19:20:24 +00:00
return it != m_data.sourceLocationZones.end() ? it->second : empty;
2018-03-18 01:35:39 +00:00
}
#endif
2018-03-18 01:35:39 +00:00
uint16_t Worker::CompressThreadReal( uint64_t thread )
2018-03-18 19:45:22 +00:00
{
auto it = m_data.threadMap.find( thread );
if( it != m_data.threadMap.end() )
{
m_data.threadLast.first = thread;
m_data.threadLast.second = it->second;
2018-03-18 19:45:22 +00:00
return it->second;
}
else
{
return CompressThreadNew( thread );
2018-03-18 19:45:22 +00:00
}
}
uint16_t Worker::CompressThreadNew( uint64_t thread )
2018-03-18 19:45:22 +00:00
{
auto sz = m_data.threadExpand.size();
m_data.threadExpand.push_back( thread );
m_data.threadMap.emplace( thread, sz );
m_data.threadLast.first = thread;
m_data.threadLast.second = sz;
return sz;
2018-03-18 19:45:22 +00:00
}
void Worker::Exec()
{
timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 10000;
auto ShouldExit = [this]
{
return m_shutdown.load( std::memory_order_relaxed );
};
2018-07-14 14:02:33 +00:00
auto lz4buf = std::make_unique<char[]>( LZ4Size );
for(;;)
{
if( m_shutdown.load( std::memory_order_relaxed ) ) return;
if( !m_sock.Connect( m_addr.c_str(), "8086" ) ) continue;
std::chrono::time_point<std::chrono::high_resolution_clock> t0;
uint64_t bytes = 0;
uint64_t decBytes = 0;
2018-08-04 17:47:09 +00:00
m_data.framesBase = m_data.frames.Retrieve( 0, [this] ( uint64_t name ) {
auto fd = m_slab.AllocInit<FrameData>();
fd->name = name;
2018-08-05 00:09:59 +00:00
fd->continuous = 1;
2018-08-04 17:47:09 +00:00
return fd;
}, [this] ( uint64_t name ) {
assert( name == 0 );
char tmp[6] = "Frame";
HandleFrameName( name, tmp, 5 );
} );
{
WelcomeMessage welcome;
if( !m_sock.Read( &welcome, sizeof( welcome ), &tv, ShouldExit ) ) goto close;
m_timerMul = welcome.timerMul;
2018-08-04 17:47:09 +00:00
const auto initEnd = TscTime( welcome.initEnd );
2018-08-05 00:09:59 +00:00
m_data.framesBase->frames.push_back( FrameEvent{ TscTime( welcome.initBegin ), -1 } );
m_data.framesBase->frames.push_back( FrameEvent{ initEnd, -1 } );
2018-08-04 17:47:09 +00:00
m_data.lastTime = initEnd;
m_delay = TscTime( welcome.delay );
m_resolution = TscTime( welcome.resolution );
2018-07-11 23:21:04 +00:00
m_onDemand = welcome.onDemand;
char dtmp[64];
time_t date = welcome.epoch;
auto lt = localtime( &date );
strftime( dtmp, 64, "%F %T", lt );
char tmp[1024];
2018-04-21 21:29:28 +00:00
sprintf( tmp, "%s @ %s", welcome.programName, dtmp );
m_captureName = tmp;
if( welcome.onDemand != 0 )
{
OnDemandPayloadMessage onDemand;
if( !m_sock.Read( &onDemand, sizeof( onDemand ), &tv, ShouldExit ) ) goto close;
2018-07-10 20:39:41 +00:00
m_data.frameOffset = onDemand.frames;
}
}
m_hasData.store( true, std::memory_order_release );
LZ4_setStreamDecode( m_stream, nullptr, 0 );
m_connected.store( true, std::memory_order_relaxed );
t0 = std::chrono::high_resolution_clock::now();
for(;;)
{
if( m_shutdown.load( std::memory_order_relaxed ) ) return;
auto buf = m_buffer + m_bufferOffset;
lz4sz_t lz4sz;
if( !m_sock.Read( &lz4sz, sizeof( lz4sz ), &tv, ShouldExit ) ) goto close;
2018-07-14 14:02:33 +00:00
if( !m_sock.Read( lz4buf.get(), lz4sz, &tv, ShouldExit ) ) goto close;
bytes += sizeof( lz4sz ) + lz4sz;
2018-07-14 14:02:33 +00:00
auto sz = LZ4_decompress_safe_continue( m_stream, lz4buf.get(), buf, lz4sz, TargetFrameSize );
assert( sz >= 0 );
decBytes += sz;
char* ptr = buf;
const char* end = buf + sz;
{
Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2018-07-13 22:39:01 +00:00
std::lock_guard<TracyMutex> lock( m_data.lock );
while( ptr < end )
{
auto ev = (const QueueItem*)ptr;
DispatchProcess( *ev, ptr );
}
m_bufferOffset += sz;
if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0;
HandlePostponedPlots();
}
auto t1 = std::chrono::high_resolution_clock::now();
auto td = std::chrono::duration_cast<std::chrono::milliseconds>( t1 - t0 ).count();
enum { MbpsUpdateTime = 200 };
if( td > MbpsUpdateTime )
{
Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2018-07-13 22:39:01 +00:00
std::lock_guard<TracyMutex> lock( m_mbpsData.lock );
m_mbpsData.mbps.erase( m_mbpsData.mbps.begin() );
m_mbpsData.mbps.emplace_back( bytes / ( td * 125.f ) );
m_mbpsData.compRatio = float( bytes ) / decBytes;
t0 = t1;
bytes = 0;
decBytes = 0;
}
if( m_terminate )
{
2018-06-20 21:42:00 +00:00
if( m_pendingStrings != 0 || m_pendingThreads != 0 || m_pendingSourceLocation != 0 || m_pendingCallstackFrames != 0 ||
2018-08-04 14:33:03 +00:00
!m_pendingCustomStrings.empty() || m_data.plots.IsPending() || !m_pendingCallstacks.empty() )
{
continue;
}
bool done = true;
for( auto& v : m_data.threads )
{
if( !v->stack.empty() )
{
done = false;
break;
}
}
if( !done ) continue;
ServerQuery( ServerQueryTerminate, 0 );
break;
}
}
close:
m_sock.Close();
m_connected.store( false, std::memory_order_relaxed );
}
}
void Worker::ServerQuery( uint8_t type, uint64_t data )
{
enum { DataSize = sizeof( type ) + sizeof( data ) };
char tmp[DataSize];
memcpy( tmp, &type, sizeof( type ) );
memcpy( tmp + sizeof( type ), &data, sizeof( data ) );
m_sock.Send( tmp, DataSize );
}
void Worker::DispatchProcess( const QueueItem& ev, char*& ptr )
{
if( ev.hdr.idx >= (int)QueueType::StringData )
{
ptr += sizeof( QueueHeader ) + sizeof( QueueStringTransfer );
uint16_t sz;
memcpy( &sz, ptr, sizeof( sz ) );
ptr += sizeof( sz );
switch( ev.hdr.type )
{
case QueueType::CustomStringData:
AddCustomString( ev.stringTransfer.ptr, ptr, sz );
break;
case QueueType::StringData:
AddString( ev.stringTransfer.ptr, ptr, sz );
break;
case QueueType::ThreadName:
AddThreadString( ev.stringTransfer.ptr, ptr, sz );
break;
case QueueType::PlotName:
HandlePlotName( ev.stringTransfer.ptr, ptr, sz );
break;
case QueueType::SourceLocationPayload:
AddSourceLocationPayload( ev.stringTransfer.ptr, ptr, sz );
2018-06-19 17:30:44 +00:00
break;
case QueueType::CallstackPayload:
2018-06-19 19:15:36 +00:00
AddCallstackPayload( ev.stringTransfer.ptr, ptr, sz );
break;
2018-08-04 18:48:21 +00:00
case QueueType::FrameName:
HandleFrameName( ev.stringTransfer.ptr, ptr, sz );
break;
default:
assert( false );
break;
}
ptr += sz;
}
else
{
ptr += QueueDataSize[ev.hdr.idx];
Process( ev );
}
}
void Worker::CheckSourceLocation( uint64_t ptr )
{
2018-03-04 15:23:28 +00:00
if( m_data.sourceLocation.find( ptr ) == m_data.sourceLocation.end() )
{
NewSourceLocation( ptr );
}
}
void Worker::NewSourceLocation( uint64_t ptr )
{
static const SourceLocation emptySourceLocation = {};
m_data.sourceLocation.emplace( ptr, emptySourceLocation );
m_pendingSourceLocation++;
m_sourceLocationQueue.push_back( ptr );
ServerQuery( ServerQuerySourceLocation, ptr );
}
uint32_t Worker::ShrinkSourceLocation( uint64_t srcloc )
{
auto it = m_sourceLocationShrink.find( srcloc );
if( it != m_sourceLocationShrink.end() )
{
return it->second;
}
else
{
return NewShrinkedSourceLocation( srcloc );
}
}
uint32_t Worker::NewShrinkedSourceLocation( uint64_t srcloc )
{
const auto sz = m_data.sourceLocationExpand.size();
m_data.sourceLocationExpand.push_back( srcloc );
#ifndef TRACY_NO_STATISTICS
m_data.sourceLocationZones.emplace( sz, SourceLocationZones() );
2018-07-29 12:16:13 +00:00
#else
m_data.sourceLocationZonesCnt.emplace( sz, 0 );
#endif
m_sourceLocationShrink.emplace( srcloc, sz );
return sz;
}
void Worker::InsertMessageData( MessageData* msg, uint64_t thread )
{
if( m_data.messages.empty() )
{
m_data.messages.push_back( msg );
}
else if( m_data.messages.back()->time < msg->time )
{
m_data.messages.push_back_non_empty( msg );
}
else
{
auto mit = std::lower_bound( m_data.messages.begin(), m_data.messages.end(), msg->time, [] ( const auto& lhs, const auto& rhs ) { return lhs->time < rhs; } );
m_data.messages.insert( mit, msg );
}
auto vec = &NoticeThread( thread )->messages;
if( vec->empty() )
{
vec->push_back( msg );
}
else if( vec->back()->time < msg->time )
{
vec->push_back_non_empty( msg );
}
else
{
auto tmit = std::lower_bound( vec->begin(), vec->end(), msg->time, [] ( const auto& lhs, const auto& rhs ) { return lhs->time < rhs; } );
vec->insert( tmit, msg );
}
}
ThreadData* Worker::NoticeThread( uint64_t thread )
{
auto it = m_threadMap.find( thread );
if( it != m_threadMap.end() )
{
return it->second;
}
else
{
return NewThread( thread );
}
}
ThreadData* Worker::NewThread( uint64_t thread )
{
CheckThreadString( thread );
auto td = m_slab.AllocInit<ThreadData>();
td->id = thread;
td->count = 0;
m_data.threads.push_back( td );
m_threadMap.emplace( thread, td );
return td;
}
void Worker::NewZone( ZoneEvent* zone, uint64_t thread )
{
m_data.zonesCnt++;
2018-03-18 01:05:33 +00:00
#ifndef TRACY_NO_STATISTICS
2018-03-18 01:05:33 +00:00
auto it = m_data.sourceLocationZones.find( zone->srcloc );
assert( it != m_data.sourceLocationZones.end() );
it->second.zones.push_back( ZoneThreadData { zone, CompressThread( thread ) } );
2018-07-29 12:16:13 +00:00
#else
auto it = m_data.sourceLocationZonesCnt.find( zone->srcloc );
assert( it != m_data.sourceLocationZonesCnt.end() );
it->second++;
#endif
2018-03-18 01:05:33 +00:00
auto td = NoticeThread( thread );
td->count++;
if( td->stack.empty() )
{
td->stack.push_back( zone );
td->timeline.push_back( zone );
}
else
{
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
auto back = td->stack.back();
if( back->child < 0 )
{
back->child = int32_t( m_data.m_zoneChildren.size() );
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>( zone ) );
}
else
{
m_data.m_zoneChildren[back->child].push_back( zone );
}
td->stack.push_back_non_empty( zone );
}
}
void Worker::InsertLockEvent( LockMap& lockmap, LockEvent* lev, uint64_t thread )
{
m_data.lastTime = std::max( m_data.lastTime, lev->time );
NoticeThread( thread );
auto it = lockmap.threadMap.find( thread );
if( it == lockmap.threadMap.end() )
{
assert( lockmap.threadList.size() < MaxLockThreads );
it = lockmap.threadMap.emplace( thread, lockmap.threadList.size() ).first;
lockmap.threadList.emplace_back( thread );
}
lev->thread = it->second;
assert( lev->thread == it->second );
auto& timeline = lockmap.timeline;
if( timeline.empty() )
{
timeline.push_back( lev );
UpdateLockCount( lockmap, timeline.size() - 1 );
}
else if( timeline.back()->time < lev->time )
{
timeline.push_back_non_empty( lev );
UpdateLockCount( lockmap, timeline.size() - 1 );
}
else
{
auto it = std::lower_bound( timeline.begin(), timeline.end(), lev->time, [] ( const auto& lhs, const auto& rhs ) { return lhs->time < rhs; } );
it = timeline.insert( it, lev );
UpdateLockCount( lockmap, std::distance( timeline.begin(), it ) );
}
}
void Worker::CheckString( uint64_t ptr )
{
if( ptr == 0 ) return;
if( m_data.strings.find( ptr ) != m_data.strings.end() ) return;
m_data.strings.emplace( ptr, "???" );
m_pendingStrings++;
ServerQuery( ServerQueryString, ptr );
}
void Worker::CheckThreadString( uint64_t id )
{
if( m_data.threadNames.find( id ) != m_data.threadNames.end() ) return;
m_data.threadNames.emplace( id, "???" );
m_pendingThreads++;
ServerQuery( ServerQueryThreadString, id );
}
void Worker::AddSourceLocation( const QueueSourceLocation& srcloc )
{
assert( m_pendingSourceLocation > 0 );
m_pendingSourceLocation--;
const auto ptr = m_sourceLocationQueue.front();
m_sourceLocationQueue.erase( m_sourceLocationQueue.begin() );
auto it = m_data.sourceLocation.find( ptr );
assert( it != m_data.sourceLocation.end() );
CheckString( srcloc.name );
CheckString( srcloc.file );
CheckString( srcloc.function );
uint32_t color = ( srcloc.r << 16 ) | ( srcloc.g << 8 ) | srcloc.b;
it->second = SourceLocation { srcloc.name == 0 ? StringRef() : StringRef( StringRef::Ptr, srcloc.name ), StringRef( StringRef::Ptr, srcloc.function ), StringRef( StringRef::Ptr, srcloc.file ), srcloc.line, color };
}
void Worker::AddSourceLocationPayload( uint64_t ptr, char* data, size_t sz )
{
const auto start = data;
assert( m_pendingSourceLocationPayload.find( ptr ) == m_pendingSourceLocationPayload.end() );
uint32_t color, line;
memcpy( &color, data, 4 );
memcpy( &line, data + 4, 4 );
data += 8;
auto end = data;
while( *end ) end++;
const auto func = StoreString( data, end - data );
end++;
data = end;
while( *end ) end++;
const auto source = StoreString( data, end - data );
end++;
const auto nsz = sz - ( end - start );
color = ( ( color & 0x00FF0000 ) >> 16 ) |
( ( color & 0x0000FF00 ) ) |
( ( color & 0x000000FF ) << 16 );
SourceLocation srcloc { nsz == 0 ? StringRef() : StringRef( StringRef::Idx, StoreString( end, nsz ).idx ), StringRef( StringRef::Idx, func.idx ), StringRef( StringRef::Idx, source.idx ), line, color };
auto it = m_data.sourceLocationPayloadMap.find( &srcloc );
if( it == m_data.sourceLocationPayloadMap.end() )
{
auto slptr = m_slab.Alloc<SourceLocation>();
memcpy( slptr, &srcloc, sizeof( srcloc ) );
uint32_t idx = m_data.sourceLocationPayload.size();
m_data.sourceLocationPayloadMap.emplace( slptr, idx );
m_pendingSourceLocationPayload.emplace( ptr, -int32_t( idx + 1 ) );
m_data.sourceLocationPayload.push_back( slptr );
#ifndef TRACY_NO_STATISTICS
m_data.sourceLocationZones.emplace( -int32_t( idx + 1 ), SourceLocationZones() );
2018-07-29 12:16:13 +00:00
#else
m_data.sourceLocationZonesCnt.emplace( -int32_t( idx + 1 ), 0 );
#endif
}
else
{
m_pendingSourceLocationPayload.emplace( ptr, -int32_t( it->second + 1 ) );
}
}
void Worker::AddString( uint64_t ptr, char* str, size_t sz )
{
assert( m_pendingStrings > 0 );
m_pendingStrings--;
auto it = m_data.strings.find( ptr );
assert( it != m_data.strings.end() && strcmp( it->second, "???" ) == 0 );
const auto sl = StoreString( str, sz );
it->second = sl.ptr;
}
void Worker::AddThreadString( uint64_t id, char* str, size_t sz )
{
assert( m_pendingThreads > 0 );
m_pendingThreads--;
auto it = m_data.threadNames.find( id );
assert( it != m_data.threadNames.end() && strcmp( it->second, "???" ) == 0 );
const auto sl = StoreString( str, sz );
it->second = sl.ptr;
}
void Worker::AddCustomString( uint64_t ptr, char* str, size_t sz )
{
assert( m_pendingCustomStrings.find( ptr ) == m_pendingCustomStrings.end() );
m_pendingCustomStrings.emplace( ptr, StoreString( str, sz ) );
}
2018-06-19 19:15:36 +00:00
void Worker::AddCallstackPayload( uint64_t ptr, char* _data, size_t sz )
{
assert( m_pendingCallstacks.find( ptr ) == m_pendingCallstacks.end() );
const auto memsize = sizeof( VarArray<uint64_t> ) + sz;
auto mem = (char*)m_slab.AllocRaw( memsize );
auto data = (uint64_t*)mem;
memcpy( data, _data, sz );
auto arr = (VarArray<uint64_t>*)( mem + sz );
new(arr) VarArray<uint64_t>( sz / sizeof( uint64_t ), data );
uint32_t idx;
auto it = m_data.callstackMap.find( arr );
if( it == m_data.callstackMap.end() )
{
idx = m_data.callstackPayload.size();
m_data.callstackMap.emplace( arr, idx );
m_data.callstackPayload.push_back( arr );
2018-06-19 22:25:26 +00:00
for( auto& frame : *arr )
{
auto fit = m_data.callstackFrameMap.find( frame );
if( fit == m_data.callstackFrameMap.end() )
{
2018-06-20 21:42:00 +00:00
m_pendingCallstackFrames++;
2018-06-19 22:25:26 +00:00
ServerQuery( ServerQueryCallstackFrame, frame );
}
}
2018-06-19 19:15:36 +00:00
}
else
{
idx = it->second;
m_slab.Unalloc( memsize );
}
m_pendingCallstacks.emplace( ptr, idx );
}
void Worker::InsertPlot( PlotData* plot, int64_t time, double val )
{
if( plot->data.empty() )
{
plot->min = val;
plot->max = val;
plot->data.push_back( { time, val } );
}
else if( plot->data.back().time < time )
{
if( plot->min > val ) plot->min = val;
else if( plot->max < val ) plot->max = val;
plot->data.push_back_non_empty( { time, val } );
}
else
{
if( plot->min > val ) plot->min = val;
else if( plot->max < val ) plot->max = val;
if( plot->postpone.empty() )
{
plot->postponeTime = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
plot->postpone.push_back( { time, val } );
}
else
{
plot->postpone.push_back_non_empty( { time, val } );
}
}
}
void Worker::HandlePlotName( uint64_t name, char* str, size_t sz )
{
const auto sl = StoreString( str, sz );
m_data.plots.StringDiscovered( name, sl, m_data.strings, [this] ( PlotData* dst, PlotData* src ) {
2018-08-04 14:33:03 +00:00
for( auto& v : src->data )
{
2018-08-04 14:33:03 +00:00
InsertPlot( dst, v.time, v.val );
}
2018-08-04 14:33:03 +00:00
} );
}
2018-08-04 18:48:21 +00:00
void Worker::HandleFrameName( uint64_t name, char* str, size_t sz )
{
const auto sl = StoreString( str, sz );
m_data.frames.StringDiscovered( name, sl, m_data.strings, [this] ( FrameData* dst, FrameData* src ) {
auto sz = dst->frames.size();
dst->frames.insert( dst->frames.end(), src->frames.begin(), src->frames.end() );
2018-08-05 00:09:59 +00:00
std::inplace_merge( dst->frames.begin(), dst->frames.begin() + sz, dst->frames.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.start < rhs.start; } );
2018-08-04 18:48:21 +00:00
} );
}
void Worker::HandlePostponedPlots()
{
2018-08-04 14:33:03 +00:00
for( auto& plot : m_data.plots.Data() )
{
auto& src = plot->postpone;
if( src.empty() ) continue;
if( std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count() - plot->postponeTime < 100 ) continue;
auto& dst = plot->data;
#ifdef MY_LIBCPP_SUCKS
pdqsort_branchless( src.begin(), src.end(), [] ( const auto& l, const auto& r ) { return l.time < r.time; } );
#else
2018-05-07 23:40:22 +00:00
std::sort( std::execution::par_unseq, src.begin(), src.end(), [] ( const auto& l, const auto& r ) { return l.time < r.time; } );
#endif
const auto ds = std::lower_bound( dst.begin(), dst.end(), src.front().time, [] ( const auto& l, const auto& r ) { return l.time < r; } );
const auto dsd = std::distance( dst.begin(), ds ) ;
const auto de = std::lower_bound( ds, dst.end(), src.back().time, [] ( const auto& l, const auto& r ) { return l.time < r; } );
const auto ded = std::distance( dst.begin(), de );
dst.insert( de, src.begin(), src.end() );
std::inplace_merge( dst.begin() + dsd, dst.begin() + ded, dst.begin() + ded + src.size(), [] ( const auto& l, const auto& r ) { return l.time < r.time; } );
src.clear();
}
}
StringLocation Worker::StoreString( char* str, size_t sz )
{
StringLocation ret;
const char backup = str[sz];
str[sz] = '\0';
auto sit = m_data.stringMap.find( str );
if( sit == m_data.stringMap.end() )
{
auto ptr = m_slab.Alloc<char>( sz+1 );
2018-03-19 14:41:28 +00:00
memcpy( ptr, str, sz );
ptr[sz] = '\0';
ret.ptr = ptr;
ret.idx = m_data.stringData.size();
m_data.stringMap.emplace( ptr, m_data.stringData.size() );
m_data.stringData.push_back( ptr );
}
else
{
ret.ptr = sit->first;
ret.idx = sit->second;
}
str[sz] = backup;
return ret;
}
void Worker::Process( const QueueItem& ev )
{
switch( ev.hdr.type )
{
case QueueType::ZoneBegin:
ProcessZoneBegin( ev.zoneBegin );
break;
case QueueType::ZoneBeginCallstack:
ProcessZoneBeginCallstack( ev.zoneBegin );
break;
case QueueType::ZoneBeginAllocSrcLoc:
ProcessZoneBeginAllocSrcLoc( ev.zoneBegin );
break;
case QueueType::ZoneEnd:
ProcessZoneEnd( ev.zoneEnd );
break;
case QueueType::FrameMarkMsg:
ProcessFrameMark( ev.frameMark );
break;
2018-08-05 00:09:59 +00:00
case QueueType::FrameMarkMsgStart:
ProcessFrameMarkStart( ev.frameMark );
break;
case QueueType::FrameMarkMsgEnd:
ProcessFrameMarkEnd( ev.frameMark );
break;
case QueueType::SourceLocation:
AddSourceLocation( ev.srcloc );
break;
case QueueType::ZoneText:
ProcessZoneText( ev.zoneText );
break;
2018-06-29 14:12:17 +00:00
case QueueType::ZoneName:
ProcessZoneName( ev.zoneText );
break;
case QueueType::LockAnnounce:
ProcessLockAnnounce( ev.lockAnnounce );
break;
case QueueType::LockWait:
ProcessLockWait( ev.lockWait );
break;
case QueueType::LockObtain:
ProcessLockObtain( ev.lockObtain );
break;
case QueueType::LockRelease:
ProcessLockRelease( ev.lockRelease );
break;
case QueueType::LockSharedWait:
ProcessLockSharedWait( ev.lockWait );
break;
case QueueType::LockSharedObtain:
ProcessLockSharedObtain( ev.lockObtain );
break;
case QueueType::LockSharedRelease:
ProcessLockSharedRelease( ev.lockRelease );
break;
case QueueType::LockMark:
ProcessLockMark( ev.lockMark );
break;
case QueueType::PlotData:
ProcessPlotData( ev.plotData );
break;
case QueueType::Message:
ProcessMessage( ev.message );
break;
case QueueType::MessageLiteral:
ProcessMessageLiteral( ev.message );
break;
case QueueType::GpuNewContext:
ProcessGpuNewContext( ev.gpuNewContext );
break;
case QueueType::GpuZoneBegin:
ProcessGpuZoneBegin( ev.gpuZoneBegin );
break;
case QueueType::GpuZoneBeginCallstack:
ProcessGpuZoneBeginCallstack( ev.gpuZoneBegin );
break;
case QueueType::GpuZoneEnd:
ProcessGpuZoneEnd( ev.gpuZoneEnd );
break;
case QueueType::GpuTime:
ProcessGpuTime( ev.gpuTime );
break;
2018-03-31 19:56:05 +00:00
case QueueType::MemAlloc:
2018-04-01 00:03:34 +00:00
ProcessMemAlloc( ev.memAlloc );
2018-03-31 19:56:05 +00:00
break;
case QueueType::MemFree:
2018-04-01 00:03:34 +00:00
ProcessMemFree( ev.memFree );
2018-03-31 19:56:05 +00:00
break;
2018-06-19 16:52:45 +00:00
case QueueType::MemAllocCallstack:
ProcessMemAllocCallstack( ev.memAlloc );
break;
case QueueType::MemFreeCallstack:
ProcessMemFreeCallstack( ev.memFree );
break;
case QueueType::CallstackMemory:
ProcessCallstackMemory( ev.callstackMemory );
break;
2018-06-21 23:15:49 +00:00
case QueueType::Callstack:
ProcessCallstack( ev.callstack );
break;
2018-06-19 23:07:09 +00:00
case QueueType::CallstackFrame:
ProcessCallstackFrame( ev.callstackFrame );
break;
case QueueType::Terminate:
m_terminate = true;
break;
case QueueType::KeepAlive:
break;
default:
assert( false );
break;
}
}
void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev )
{
CheckSourceLocation( ev.srcloc );
zone->start = TscTime( ev.time );
zone->end = -1;
zone->srcloc = ShrinkSourceLocation( ev.srcloc );
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
2018-06-21 23:11:03 +00:00
zone->callstack = 0;
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
zone->child = -1;
m_data.lastTime = std::max( m_data.lastTime, zone->start );
NewZone( zone, ev.thread );
}
void Worker::ProcessZoneBegin( const QueueZoneBegin& ev )
{
auto zone = m_slab.AllocInit<ZoneEvent>();
ProcessZoneBeginImpl( zone, ev );
}
void Worker::ProcessZoneBeginCallstack( const QueueZoneBegin& ev )
{
auto zone = m_slab.AllocInit<ZoneEvent>();
ProcessZoneBeginImpl( zone, ev );
auto& next = m_nextCallstack[ev.thread];
next.type = NextCallstackType::Zone;
next.zone = zone;
}
void Worker::ProcessZoneBeginAllocSrcLoc( const QueueZoneBegin& ev )
{
auto it = m_pendingSourceLocationPayload.find( ev.srcloc );
assert( it != m_pendingSourceLocationPayload.end() );
auto zone = m_slab.AllocInit<ZoneEvent>();
zone->start = TscTime( ev.time );
zone->end = -1;
zone->srcloc = it->second;
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
2018-06-21 23:11:03 +00:00
zone->callstack = 0;
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
zone->child = -1;
m_data.lastTime = std::max( m_data.lastTime, zone->start );
NewZone( zone, ev.thread );
m_pendingSourceLocationPayload.erase( it );
}
void Worker::ProcessZoneEnd( const QueueZoneEnd& ev )
{
auto tit = m_threadMap.find( ev.thread );
assert( tit != m_threadMap.end() );
auto td = tit->second;
auto& stack = td->stack;
assert( !stack.empty() );
auto zone = stack.back_and_pop();
assert( zone->end == -1 );
zone->end = TscTime( ev.time );
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_end = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
assert( zone->end >= zone->start );
m_data.lastTime = std::max( m_data.lastTime, zone->end );
#ifndef TRACY_NO_STATISTICS
2018-06-05 22:39:22 +00:00
auto timeSpan = zone->end - zone->start;
if( timeSpan > 0 )
{
auto it = m_data.sourceLocationZones.find( zone->srcloc );
assert( it != m_data.sourceLocationZones.end() );
it->second.min = std::min( it->second.min, timeSpan );
it->second.max = std::max( it->second.max, timeSpan );
it->second.total += timeSpan;
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
if( zone->child >= 0 )
2018-06-05 22:39:22 +00:00
{
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
for( auto& v : GetZoneChildren( zone->child ) )
{
const auto childSpan = std::max( int64_t( 0 ), v->end - v->start );
timeSpan -= childSpan;
}
2018-06-05 22:39:22 +00:00
}
it->second.selfTotal += timeSpan;
}
#endif
}
void Worker::ProcessFrameMark( const QueueFrameMark& ev )
{
2018-08-04 17:47:09 +00:00
auto fd = m_data.frames.Retrieve( ev.name, [this] ( uint64_t name ) {
auto fd = m_slab.AllocInit<FrameData>();
fd->name = name;
2018-08-05 00:09:59 +00:00
fd->continuous = 1;
2018-08-04 17:47:09 +00:00
return fd;
}, [this] ( uint64_t name ) {
ServerQuery( ServerQueryFrameName, name );
} );
2018-08-05 00:09:59 +00:00
assert( fd->continuous == 1 );
const auto time = TscTime( ev.time );
2018-08-05 00:09:59 +00:00
assert( fd->frames.empty() || fd->frames.back().start < time );
fd->frames.push_back( FrameEvent{ time, -1 } );
m_data.lastTime = std::max( m_data.lastTime, time );
}
void Worker::ProcessFrameMarkStart( const QueueFrameMark& ev )
{
auto fd = m_data.frames.Retrieve( ev.name, [this] ( uint64_t name ) {
auto fd = m_slab.AllocInit<FrameData>();
fd->name = name;
fd->continuous = 0;
return fd;
}, [this] ( uint64_t name ) {
ServerQuery( ServerQueryFrameName, name );
} );
assert( fd->continuous == 0 );
const auto time = TscTime( ev.time );
assert( fd->frames.empty() || ( fd->frames.back().end < time && fd->frames.back().end != -1 ) );
fd->frames.push_back( FrameEvent{ time, -1 } );
m_data.lastTime = std::max( m_data.lastTime, time );
}
void Worker::ProcessFrameMarkEnd( const QueueFrameMark& ev )
{
auto fd = m_data.frames.Retrieve( ev.name, [this] ( uint64_t name ) {
auto fd = m_slab.AllocInit<FrameData>();
fd->name = name;
fd->continuous = 0;
return fd;
}, [this] ( uint64_t name ) {
ServerQuery( ServerQueryFrameName, name );
} );
assert( fd->continuous == 0 );
const auto time = TscTime( ev.time );
if( fd->frames.empty() )
{
assert( m_onDemand );
return;
}
assert( fd->frames.back().end == -1 );
fd->frames.back().end = time;
m_data.lastTime = std::max( m_data.lastTime, time );
}
void Worker::ProcessZoneText( const QueueZoneText& ev )
{
auto tit = m_threadMap.find( ev.thread );
assert( tit != m_threadMap.end() );
auto td = tit->second;
auto& stack = td->stack;
assert( !stack.empty() );
auto zone = stack.back();
auto it = m_pendingCustomStrings.find( ev.text );
assert( it != m_pendingCustomStrings.end() );
zone->text = StringIdx( it->second.idx );
m_pendingCustomStrings.erase( it );
}
2018-06-29 14:12:17 +00:00
void Worker::ProcessZoneName( const QueueZoneText& ev )
{
auto tit = m_threadMap.find( ev.thread );
assert( tit != m_threadMap.end() );
auto td = tit->second;
auto& stack = td->stack;
assert( !stack.empty() );
auto zone = stack.back();
auto it = m_pendingCustomStrings.find( ev.text );
assert( it != m_pendingCustomStrings.end() );
zone->name = StringIdx( it->second.idx );
m_pendingCustomStrings.erase( it );
}
void Worker::ProcessLockAnnounce( const QueueLockAnnounce& ev )
{
auto it = m_data.lockMap.find( ev.id );
if( it == m_data.lockMap.end() )
{
LockMap lm;
lm.srcloc = ShrinkSourceLocation( ev.lckloc );
lm.type = ev.type;
2018-04-09 12:28:40 +00:00
lm.valid = true;
m_data.lockMap.emplace( ev.id, std::move( lm ) );
}
else
{
it->second.srcloc = ShrinkSourceLocation( ev.lckloc );
assert( it->second.type == ev.type );
it->second.valid = true;
}
CheckSourceLocation( ev.lckloc );
}
void Worker::ProcessLockWait( const QueueLockWait& ev )
{
auto it = m_data.lockMap.find( ev.id );
if( it == m_data.lockMap.end() )
{
LockMap lm;
lm.valid = false;
lm.type = ev.type;
it = m_data.lockMap.emplace( ev.id, std::move( lm ) ).first;
}
auto lev = ev.type == LockType::Lockable ? m_slab.Alloc<LockEvent>() : m_slab.Alloc<LockEventShared>();
lev->time = TscTime( ev.time );
lev->type = LockEvent::Type::Wait;
lev->srcloc = 0;
InsertLockEvent( it->second, lev, ev.thread );
}
void Worker::ProcessLockObtain( const QueueLockObtain& ev )
{
assert( m_data.lockMap.find( ev.id ) != m_data.lockMap.end() );
auto& lock = m_data.lockMap[ev.id];
auto lev = lock.type == LockType::Lockable ? m_slab.Alloc<LockEvent>() : m_slab.Alloc<LockEventShared>();
lev->time = TscTime( ev.time );
lev->type = LockEvent::Type::Obtain;
lev->srcloc = 0;
InsertLockEvent( lock, lev, ev.thread );
}
void Worker::ProcessLockRelease( const QueueLockRelease& ev )
{
assert( m_data.lockMap.find( ev.id ) != m_data.lockMap.end() );
auto& lock = m_data.lockMap[ev.id];
auto lev = lock.type == LockType::Lockable ? m_slab.Alloc<LockEvent>() : m_slab.Alloc<LockEventShared>();
lev->time = TscTime( ev.time );
lev->type = LockEvent::Type::Release;
lev->srcloc = 0;
InsertLockEvent( lock, lev, ev.thread );
}
void Worker::ProcessLockSharedWait( const QueueLockWait& ev )
{
auto it = m_data.lockMap.find( ev.id );
if( it == m_data.lockMap.end() )
{
LockMap lm;
lm.valid = false;
lm.type = ev.type;
it = m_data.lockMap.emplace( ev.id, std::move( lm ) ).first;
}
assert( ev.type == LockType::SharedLockable );
auto lev = m_slab.Alloc<LockEventShared>();
lev->time = TscTime( ev.time );
lev->type = LockEvent::Type::WaitShared;
lev->srcloc = 0;
InsertLockEvent( it->second, lev, ev.thread );
}
void Worker::ProcessLockSharedObtain( const QueueLockObtain& ev )
{
assert( m_data.lockMap.find( ev.id ) != m_data.lockMap.end() );
auto& lock = m_data.lockMap[ev.id];
assert( lock.type == LockType::SharedLockable );
auto lev = m_slab.Alloc<LockEventShared>();
lev->time = TscTime( ev.time );
lev->type = LockEvent::Type::ObtainShared;
lev->srcloc = 0;
InsertLockEvent( lock, lev, ev.thread );
}
void Worker::ProcessLockSharedRelease( const QueueLockRelease& ev )
{
assert( m_data.lockMap.find( ev.id ) != m_data.lockMap.end() );
auto& lock = m_data.lockMap[ev.id];
assert( lock.type == LockType::SharedLockable );
auto lev = m_slab.Alloc<LockEventShared>();
lev->time = TscTime( ev.time );
lev->type = LockEvent::Type::ReleaseShared;
lev->srcloc = 0;
InsertLockEvent( lock, lev, ev.thread );
}
void Worker::ProcessLockMark( const QueueLockMark& ev )
{
CheckSourceLocation( ev.srcloc );
auto lit = m_data.lockMap.find( ev.id );
assert( lit != m_data.lockMap.end() );
auto& lockmap = lit->second;
auto tid = lockmap.threadMap.find( ev.thread );
assert( tid != lockmap.threadMap.end() );
const auto thread = tid->second;
auto it = lockmap.timeline.end();
for(;;)
{
--it;
if( (*it)->thread == thread )
{
switch( (*it)->type )
{
case LockEvent::Type::Obtain:
case LockEvent::Type::ObtainShared:
case LockEvent::Type::Wait:
case LockEvent::Type::WaitShared:
(*it)->srcloc = ShrinkSourceLocation( ev.srcloc );
return;
default:
break;
}
}
}
}
void Worker::ProcessPlotData( const QueuePlotData& ev )
{
2018-08-04 14:33:03 +00:00
PlotData* plot = m_data.plots.Retrieve( ev.name, [this] ( uint64_t name ) {
auto plot = m_slab.AllocInit<PlotData>();
plot->name = name;
plot->type = PlotType::User;
return plot;
}, [this]( uint64_t name ) {
ServerQuery( ServerQueryPlotName, name );
} );
const auto time = TscTime( ev.time );
m_data.lastTime = std::max( m_data.lastTime, time );
switch( ev.type )
{
case PlotDataType::Double:
InsertPlot( plot, time, ev.data.d );
break;
case PlotDataType::Float:
InsertPlot( plot, time, (double)ev.data.f );
break;
case PlotDataType::Int:
InsertPlot( plot, time, (double)ev.data.i );
break;
default:
assert( false );
break;
}
}
void Worker::ProcessMessage( const QueueMessage& ev )
{
auto it = m_pendingCustomStrings.find( ev.text );
assert( it != m_pendingCustomStrings.end() );
auto msg = m_slab.Alloc<MessageData>();
msg->time = TscTime( ev.time );
msg->ref = StringRef( StringRef::Type::Idx, it->second.idx );
2018-05-25 19:10:22 +00:00
msg->thread = ev.thread;
m_data.lastTime = std::max( m_data.lastTime, msg->time );
InsertMessageData( msg, ev.thread );
m_pendingCustomStrings.erase( it );
}
void Worker::ProcessMessageLiteral( const QueueMessage& ev )
{
CheckString( ev.text );
auto msg = m_slab.Alloc<MessageData>();
msg->time = TscTime( ev.time );
msg->ref = StringRef( StringRef::Type::Ptr, ev.text );
2018-05-25 19:10:22 +00:00
msg->thread = ev.thread;
m_data.lastTime = std::max( m_data.lastTime, msg->time );
InsertMessageData( msg, ev.thread );
}
void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev )
{
assert( !m_gpuCtxMap[ev.context] );
int64_t gpuTime;
if( ev.period == 1.f )
{
gpuTime = ev.gpuTime;
}
else
{
gpuTime = int64_t( double( ev.period ) * ev.gpuTime ); // precision loss
}
auto gpu = m_slab.AllocInit<GpuCtxData>();
memset( gpu->query, 0, sizeof( gpu->query ) );
gpu->timeDiff = TscTime( ev.cpuTime ) - gpuTime;
gpu->thread = ev.thread;
gpu->accuracyBits = ev.accuracyBits;
gpu->period = ev.period;
gpu->count = 0;
m_data.gpuData.push_back( gpu );
m_gpuCtxMap[ev.context] = gpu;
}
void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev )
{
auto ctx = m_gpuCtxMap[ev.context];
assert( ctx );
CheckSourceLocation( ev.srcloc );
zone->cpuStart = TscTime( ev.cpuTime );
zone->cpuEnd = -1;
zone->gpuStart = std::numeric_limits<int64_t>::max();
zone->gpuEnd = -1;
zone->srcloc = ShrinkSourceLocation( ev.srcloc );
zone->callstack = 0;
2018-07-22 18:14:55 +00:00
zone->child = -1;
if( ctx->thread == 0 )
{
// Vulkan context is not bound to any single thread.
zone->thread = CompressThread( ev.thread );
}
else
{
// OpenGL doesn't need per-zone thread id. It still can be sent,
// because it may be needed for callstack collection purposes.
zone->thread = 0;
}
m_data.lastTime = std::max( m_data.lastTime, zone->cpuStart );
auto timeline = &ctx->timeline;
if( !ctx->stack.empty() )
{
auto back = ctx->stack.back();
if( back->child < 0 )
{
back->child = int32_t( m_data.m_gpuChildren.size() );
m_data.m_gpuChildren.push_back( Vector<GpuEvent*>() );
}
timeline = &m_data.m_gpuChildren[back->child];
}
timeline->push_back( zone );
ctx->stack.push_back( zone );
assert( !ctx->query[ev.queryId] );
ctx->query[ev.queryId] = zone;
}
void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
{
auto zone = m_slab.AllocInit<GpuEvent>();
ProcessGpuZoneBeginImpl( zone, ev );
}
void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev )
{
auto zone = m_slab.AllocInit<GpuEvent>();
ProcessGpuZoneBeginImpl( zone, ev );
auto& next = m_nextCallstack[ev.thread];
next.type = NextCallstackType::Gpu;
next.gpu = zone;
}
void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
{
auto ctx = m_gpuCtxMap[ev.context];
assert( ctx );
assert( !ctx->stack.empty() );
auto zone = ctx->stack.back_and_pop();
assert( !ctx->query[ev.queryId] );
ctx->query[ev.queryId] = zone;
zone->cpuEnd = TscTime( ev.cpuTime );
m_data.lastTime = std::max( m_data.lastTime, zone->cpuEnd );
}
void Worker::ProcessGpuTime( const QueueGpuTime& ev )
{
auto ctx = m_gpuCtxMap[ev.context];
assert( ctx );
int64_t gpuTime;
if( ctx->period == 1.f )
{
gpuTime = ev.gpuTime;
}
else
{
gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss
}
auto zone = ctx->query[ev.queryId];
assert( zone );
ctx->query[ev.queryId] = nullptr;
if( zone->gpuStart == std::numeric_limits<int64_t>::max() )
{
zone->gpuStart = ctx->timeDiff + gpuTime;
m_data.lastTime = std::max( m_data.lastTime, zone->gpuStart );
ctx->count++;
}
else
{
zone->gpuEnd = ctx->timeDiff + gpuTime;
m_data.lastTime = std::max( m_data.lastTime, zone->gpuEnd );
if( zone->gpuEnd < zone->gpuStart )
{
std::swap( zone->gpuEnd, zone->gpuStart );
}
}
}
2018-04-01 00:03:34 +00:00
void Worker::ProcessMemAlloc( const QueueMemAlloc& ev )
{
const auto time = TscTime( ev.time );
assert( m_data.memory.active.find( ev.ptr ) == m_data.memory.active.end() );
assert( m_data.memory.data.empty() || m_data.memory.data.back().timeAlloc <= time );
2018-04-01 00:03:34 +00:00
m_data.memory.active.emplace( ev.ptr, m_data.memory.data.size() );
2018-04-01 00:03:34 +00:00
2018-04-10 14:06:01 +00:00
const auto ptr = ev.ptr;
uint32_t lo;
uint16_t hi;
memcpy( &lo, ev.size, 4 );
memcpy( &hi, ev.size+4, 2 );
const uint64_t size = lo | ( uint64_t( hi ) << 32 );
auto& mem = m_data.memory.data.push_next();
2018-04-10 14:06:01 +00:00
mem.ptr = ptr;
mem.size = size;
mem.timeAlloc = time;
mem.threadAlloc = CompressThread( ev.thread );
mem.timeFree = -1;
mem.threadFree = 0;
mem.csAlloc = 0;
mem.csFree = 0;
2018-04-01 00:03:34 +00:00
2018-04-10 14:06:01 +00:00
const auto low = m_data.memory.low;
const auto high = m_data.memory.high;
const auto ptrend = ptr + size;
m_data.memory.low = std::min( low, ptr );
m_data.memory.high = std::max( high, ptrend );
m_data.memory.usage += size;
MemAllocChanged( time );
2018-04-01 00:03:34 +00:00
}
bool Worker::ProcessMemFree( const QueueMemFree& ev )
2018-04-01 00:03:34 +00:00
{
const auto time = TscTime( ev.time );
2018-04-01 00:03:34 +00:00
auto it = m_data.memory.active.find( ev.ptr );
if( it == m_data.memory.active.end() )
{
assert( m_onDemand );
return false;
}
2018-05-02 15:59:50 +00:00
m_data.memory.frees.push_back( it->second );
auto& mem = m_data.memory.data[it->second];
mem.timeFree = time;
mem.threadFree = CompressThread( ev.thread );
m_data.memory.usage -= mem.size;
2018-04-01 18:13:01 +00:00
m_data.memory.active.erase( it );
MemAllocChanged( time );
return true;
}
2018-06-19 16:52:45 +00:00
void Worker::ProcessMemAllocCallstack( const QueueMemAlloc& ev )
{
m_lastMemActionCallstack = m_data.memory.data.size();
ProcessMemAlloc( ev );
m_lastMemActionWasAlloc = true;
2018-06-19 16:52:45 +00:00
}
void Worker::ProcessMemFreeCallstack( const QueueMemFree& ev )
{
if( ProcessMemFree( ev ) )
{
m_lastMemActionCallstack = m_data.memory.frees.back();
m_lastMemActionWasAlloc = false;
}
else
{
m_lastMemActionCallstack = std::numeric_limits<uint64_t>::max();
}
2018-06-19 16:52:45 +00:00
}
void Worker::ProcessCallstackMemory( const QueueCallstackMemory& ev )
{
2018-06-19 19:34:36 +00:00
auto it = m_pendingCallstacks.find( ev.ptr );
assert( it != m_pendingCallstacks.end() );
if( m_lastMemActionCallstack != std::numeric_limits<uint64_t>::max() )
{
auto& mem = m_data.memory.data[m_lastMemActionCallstack];
if( m_lastMemActionWasAlloc )
{
mem.csAlloc = it->second;
}
else
{
mem.csFree = it->second;
}
}
2018-06-19 19:34:36 +00:00
m_pendingCallstacks.erase( it );
2018-06-19 16:52:45 +00:00
}
2018-06-21 23:15:49 +00:00
void Worker::ProcessCallstack( const QueueCallstack& ev )
{
auto it = m_pendingCallstacks.find( ev.ptr );
assert( it != m_pendingCallstacks.end() );
auto nit = m_nextCallstack.find( ev.thread );
assert( nit != m_nextCallstack.end() );
auto& next = nit->second;
switch( next.type )
{
case NextCallstackType::Zone:
next.zone->callstack = it->second;
break;
case NextCallstackType::Gpu:
next.gpu->callstack = it->second;
break;
2018-06-21 23:15:49 +00:00
default:
assert( false );
break;
}
m_pendingCallstacks.erase( it );
}
2018-06-19 23:07:09 +00:00
void Worker::ProcessCallstackFrame( const QueueCallstackFrame& ev )
{
2018-06-20 21:42:00 +00:00
assert( m_pendingCallstackFrames > 0 );
m_pendingCallstackFrames--;
2018-06-19 23:07:09 +00:00
auto fmit = m_data.callstackFrameMap.find( ev.ptr );
auto nit = m_pendingCustomStrings.find( ev.name );
assert( nit != m_pendingCustomStrings.end() );
auto fit = m_pendingCustomStrings.find( ev.file );
assert( fit != m_pendingCustomStrings.end() );
2018-06-19 23:07:09 +00:00
// Frames may be duplicated due to recursion
if( fmit == m_data.callstackFrameMap.end() )
{
CheckString( ev.file );
auto frame = m_slab.Alloc<CallstackFrame>();
frame->name = StringIdx( nit->second.idx );
frame->file = StringIdx( fit->second.idx );
2018-06-19 23:07:09 +00:00
frame->line = ev.line;
m_data.callstackFrameMap.emplace( ev.ptr, frame );
}
m_pendingCustomStrings.erase( nit );
m_pendingCustomStrings.erase( m_pendingCustomStrings.find( ev.file ) );
2018-06-19 23:07:09 +00:00
}
void Worker::MemAllocChanged( int64_t time )
{
const auto val = (double)m_data.memory.usage;
if( !m_data.memory.plot )
{
CreateMemAllocPlot();
m_data.memory.plot->min = val;
m_data.memory.plot->max = val;
m_data.memory.plot->data.push_back( { time, val } );
}
else
{
assert( !m_data.memory.plot->data.empty() );
assert( m_data.memory.plot->data.back().time <= time );
if( m_data.memory.plot->min > val ) m_data.memory.plot->min = val;
else if( m_data.memory.plot->max < val ) m_data.memory.plot->max = val;
m_data.memory.plot->data.push_back_non_empty( { time, val } );
}
}
void Worker::CreateMemAllocPlot()
{
assert( !m_data.memory.plot );
m_data.memory.plot = m_slab.AllocInit<PlotData>();
m_data.memory.plot->name = 0;
m_data.memory.plot->type = PlotType::Memory;
2018-08-04 17:47:09 +00:00
m_data.memory.plot->data.push_back( { GetFrameBegin( *m_data.framesBase, 0 ), 0. } );
2018-08-04 14:33:03 +00:00
m_data.plots.Data().push_back( m_data.memory.plot );
2018-04-01 00:03:34 +00:00
}
void Worker::ReconstructMemAllocPlot()
{
2018-05-02 15:59:50 +00:00
auto& mem = m_data.memory;
#ifdef MY_LIBCPP_SUCKS
pdqsort_branchless( mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].timeFree < mem.data[rhs].timeFree; } );
#else
2018-05-07 23:40:22 +00:00
std::sort( std::execution::par_unseq, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].timeFree < mem.data[rhs].timeFree; } );
#endif
2018-05-02 15:59:50 +00:00
const auto psz = mem.data.size() + mem.frees.size() + 1;
PlotData* plot;
{
Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2018-07-13 22:39:01 +00:00
std::lock_guard<TracyMutex> lock( m_data.lock );
plot = m_slab.AllocInit<PlotData>();
}
plot->name = 0;
plot->type = PlotType::Memory;
plot->data.reserve_and_use( psz );
2018-05-02 15:59:50 +00:00
auto aptr = mem.data.begin();
auto aend = mem.data.end();
auto fptr = mem.frees.begin();
auto fend = mem.frees.end();
double max = 0;
double usage = 0;
auto ptr = plot->data.data();
2018-08-04 17:47:09 +00:00
ptr->time = GetFrameBegin( *m_data.framesBase, 0 );
ptr->val = 0;
ptr++;
2018-04-30 11:44:44 +00:00
if( aptr != aend && fptr != fend )
{
2018-04-30 11:44:44 +00:00
auto atime = aptr->timeAlloc;
2018-05-02 15:59:50 +00:00
auto ftime = mem.data[*fptr].timeFree;
2018-04-30 11:44:44 +00:00
for(;;)
{
2018-04-30 11:44:44 +00:00
if( atime < ftime )
{
usage += int64_t( aptr->size );
assert( usage >= 0 );
if( max < usage ) max = usage;
ptr->time = atime;
ptr->val = usage;
ptr++;
aptr++;
if( aptr == aend ) break;
atime = aptr->timeAlloc;
}
else
{
2018-05-02 15:59:50 +00:00
usage -= int64_t( mem.data[*fptr].size );
2018-04-30 11:44:44 +00:00
assert( usage >= 0 );
if( max < usage ) max = usage;
ptr->time = ftime;
ptr->val = usage;
ptr++;
fptr++;
if( fptr == fend ) break;
2018-05-02 15:59:50 +00:00
ftime = mem.data[*fptr].timeFree;
2018-04-30 11:44:44 +00:00
}
}
}
2018-04-30 11:44:44 +00:00
while( aptr != aend )
{
assert( aptr->timeFree < 0 );
int64_t time = aptr->timeAlloc;
usage += int64_t( aptr->size );
assert( usage >= 0 );
if( max < usage ) max = usage;
ptr->time = time;
ptr->val = usage;
ptr++;
aptr++;
}
while( fptr != fend )
{
2018-05-02 15:59:50 +00:00
int64_t time = mem.data[*fptr].timeFree;
usage -= int64_t( mem.data[*fptr].size );
assert( usage >= 0 );
assert( max >= usage );
ptr->time = time;
ptr->val = usage;
ptr++;
fptr++;
}
plot->min = 0;
plot->max = max;
Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2018-07-13 22:39:01 +00:00
std::lock_guard<TracyMutex> lock( m_data.lock );
2018-08-04 14:33:03 +00:00
m_data.plots.Data().insert( m_data.plots.Data().begin(), plot );
m_data.memory.plot = plot;
}
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
void Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread )
{
uint64_t sz;
2018-04-30 23:47:56 +00:00
f.Read( sz );
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
if( sz == 0 )
{
zone->child = -1;
}
else
{
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
zone->child = m_data.m_zoneChildren.size();
// Put placeholder to have proper size of zone children in nested calls
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>() );
// Real data buffer. Can't use placeholder, as the vector can be reallocated
// and the buffer address will change, but the reference won't.
Vector<ZoneEvent*> tmp;
ReadTimeline( f, tmp, thread, sz );
m_data.m_zoneChildren[zone->child] = std::move( tmp );
}
}
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
void Worker::ReadTimelinePre033( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer )
{
uint64_t sz;
f.Read( sz );
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
if( sz == 0 )
{
zone->child = -1;
}
else
{
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
zone->child = m_data.m_zoneChildren.size();
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>() );
Vector<ZoneEvent*> tmp;
ReadTimelinePre033( f, tmp, thread, sz, fileVer );
m_data.m_zoneChildren[zone->child] = std::move( tmp );
}
}
void Worker::ReadTimeline( FileRead& f, GpuEvent* zone )
{
uint64_t sz;
2018-04-30 23:47:56 +00:00
f.Read( sz );
if( sz == 0 )
{
zone->child = -1;
}
else
{
zone->child = m_data.m_gpuChildren.size();
m_data.m_gpuChildren.push_back( Vector<GpuEvent*>() );
Vector<GpuEvent*> tmp;
ReadTimeline( f, tmp, sz );
m_data.m_gpuChildren[zone->child] = std::move( tmp );
}
}
2018-03-15 20:27:36 +00:00
void Worker::ReadTimelinePre032( FileRead& f, GpuEvent* zone )
2018-06-17 17:05:22 +00:00
{
uint64_t sz;
f.Read( sz );
if( sz == 0 )
{
zone->child = -1;
}
else
2018-06-17 17:05:22 +00:00
{
zone->child = m_data.m_gpuChildren.size();
m_data.m_gpuChildren.push_back( Vector<GpuEvent*>() );
Vector<GpuEvent*> tmp;
ReadTimelinePre032( f, tmp, sz );
m_data.m_gpuChildren[zone->child] = std::move( tmp );
2018-06-17 17:05:22 +00:00
}
}
void Worker::ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread )
{
#ifndef TRACY_NO_STATISTICS
auto it = m_data.sourceLocationZones.find( zone->srcloc );
assert( it != m_data.sourceLocationZones.end() );
auto& ztd = it->second.zones.push_next();
ztd.zone = zone;
ztd.thread = thread;
if( zone->end >= 0 )
{
auto timeSpan = zone->end - zone->start;
if( timeSpan > 0 )
{
it->second.min = std::min( it->second.min, timeSpan );
it->second.max = std::max( it->second.max, timeSpan );
it->second.total += timeSpan;
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
if( zone->child >= 0 )
{
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
for( auto& v : GetZoneChildren( zone->child ) )
{
const auto childSpan = std::max( int64_t( 0 ), v->end - v->start );
timeSpan -= childSpan;
}
}
it->second.selfTotal += timeSpan;
}
}
2018-07-29 12:16:13 +00:00
#else
auto it = m_data.sourceLocationZonesCnt.find( zone->srcloc );
assert( it != m_data.sourceLocationZonesCnt.end() );
it->second++;
#endif
}
void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size )
{
assert( size != 0 );
vec.reserve_non_zero( size );
m_data.zonesCnt += size;
for( uint64_t i=0; i<size; i++ )
{
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
auto zone = m_slab.Alloc<ZoneEvent>();
2018-03-15 20:32:06 +00:00
vec.push_back_no_space_check( zone );
f.Read( zone, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) );
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
ReadTimeline( f, zone, thread );
ReadTimelineUpdateStatistics( zone, thread );
}
}
2018-03-18 22:37:07 +00:00
2018-06-29 14:12:17 +00:00
void Worker::ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int fileVer )
{
assert( size != 0 );
vec.reserve_non_zero( size );
m_data.zonesCnt += size;
for( uint64_t i=0; i<size; i++ )
{
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
auto zone = m_slab.Alloc<ZoneEvent>();
vec.push_back_no_space_check( zone );
2018-06-29 14:12:17 +00:00
if( fileVer <= FileVersion( 0, 3, 1 ) )
{
f.Read( zone, 26 );
zone->callstack = 0;
zone->name.__data = 0;
}
else
{
assert( fileVer <= FileVersion( 0, 3, 2 ) );
f.Read( zone, 30 );
zone->name.__data = 0;
}
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
ReadTimelinePre033( f, zone, thread, fileVer );
ReadTimelineUpdateStatistics( zone, thread );
}
}
void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size )
{
assert( size != 0 );
vec.reserve_non_zero( size );
for( uint64_t i=0; i<size; i++ )
{
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
auto zone = m_slab.AllocInit<GpuEvent>();
2018-03-15 20:32:06 +00:00
vec.push_back_no_space_check( zone );
f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) + sizeof( GpuEvent::callstack ) );
uint64_t thread;
f.Read( thread );
2018-07-22 19:13:42 +00:00
if( thread == 0 )
{
zone->thread = 0;
}
else
{
zone->thread = CompressThread( thread );
}
ReadTimeline( f, zone );
}
}
2018-06-17 17:05:22 +00:00
void Worker::ReadTimelinePre032( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size )
{
assert( size != 0 );
vec.reserve_non_zero( size );
for( uint64_t i=0; i<size; i++ )
{
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
2018-06-17 17:05:22 +00:00
auto zone = m_slab.AllocInit<GpuEvent>();
vec.push_back_no_space_check( zone );
f.Read( zone, 36 );
zone->thread = 0;
zone->callstack = 0;
ReadTimelinePre032( f, zone );
2018-06-17 17:05:22 +00:00
}
}
void Worker::Write( FileWrite& f )
{
2018-04-21 11:45:48 +00:00
f.Write( FileHeader, sizeof( FileHeader ) );
f.Write( &m_delay, sizeof( m_delay ) );
f.Write( &m_resolution, sizeof( m_resolution ) );
f.Write( &m_timerMul, sizeof( m_timerMul ) );
f.Write( &m_data.lastTime, sizeof( m_data.lastTime ) );
2018-07-10 20:56:41 +00:00
f.Write( &m_data.frameOffset, sizeof( m_data.frameOffset ) );
uint64_t sz = m_captureName.size();
f.Write( &sz, sizeof( sz ) );
f.Write( m_captureName.c_str(), sz );
2018-08-04 17:47:09 +00:00
sz = m_data.frames.Data().size();
f.Write( &sz, sizeof( sz ) );
2018-08-04 17:47:09 +00:00
for( auto& fd : m_data.frames.Data() )
{
f.Write( &fd->name, sizeof( fd->name ) );
2018-08-05 00:09:59 +00:00
f.Write( &fd->continuous, sizeof( fd->continuous ) );
2018-08-04 17:47:09 +00:00
sz = fd->frames.size();
f.Write( &sz, sizeof( sz ) );
2018-08-05 00:09:59 +00:00
if( fd->continuous )
{
for( auto& fe : fd->frames )
{
f.Write( &fe.start, sizeof( fe.start ) );
}
}
else
{
f.Write( fd->frames.data(), sizeof( FrameEvent ) * sz );
}
2018-08-04 17:47:09 +00:00
}
sz = m_data.stringData.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.stringData )
{
uint64_t ptr = (uint64_t)v;
f.Write( &ptr, sizeof( ptr ) );
sz = strlen( v );
f.Write( &sz, sizeof( sz ) );
f.Write( v, sz );
}
sz = m_data.strings.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.strings )
{
f.Write( &v.first, sizeof( v.first ) );
uint64_t ptr = (uint64_t)v.second;
f.Write( &ptr, sizeof( ptr ) );
}
sz = m_data.threadNames.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.threadNames )
{
f.Write( &v.first, sizeof( v.first ) );
uint64_t ptr = (uint64_t)v.second;
f.Write( &ptr, sizeof( ptr ) );
}
sz = m_data.threadExpand.size();
f.Write( &sz, sizeof( sz ) );
sz = m_data.sourceLocation.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.sourceLocation )
{
f.Write( &v.first, sizeof( v.first ) );
f.Write( &v.second, sizeof( v.second ) );
}
sz = m_data.sourceLocationExpand.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.sourceLocationExpand )
{
f.Write( &v, sizeof( v ) );
}
sz = m_data.sourceLocationPayload.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.sourceLocationPayload )
{
f.Write( v, sizeof( *v ) );
}
#ifndef TRACY_NO_STATISTICS
sz = m_data.sourceLocationZones.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.sourceLocationZones )
{
int32_t id = v.first;
uint64_t cnt = v.second.zones.size();
f.Write( &id, sizeof( id ) );
f.Write( &cnt, sizeof( cnt ) );
}
#else
sz = m_data.sourceLocationZonesCnt.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.sourceLocationZonesCnt )
{
int32_t id = v.first;
uint64_t cnt = v.second;
f.Write( &id, sizeof( id ) );
f.Write( &cnt, sizeof( cnt ) );
}
#endif
sz = m_data.lockMap.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.lockMap )
{
f.Write( &v.first, sizeof( v.first ) );
f.Write( &v.second.srcloc, sizeof( v.second.srcloc ) );
f.Write( &v.second.type, sizeof( v.second.type ) );
f.Write( &v.second.valid, sizeof( v.second.valid ) );
sz = v.second.threadList.size();
f.Write( &sz, sizeof( sz ) );
for( auto& t : v.second.threadList )
{
f.Write( &t, sizeof( t ) );
}
sz = v.second.timeline.size();
f.Write( &sz, sizeof( sz ) );
for( auto& lev : v.second.timeline )
{
2018-04-29 01:37:34 +00:00
f.Write( lev, sizeof( LockEvent::time ) + sizeof( LockEvent::srcloc ) + sizeof( LockEvent::thread ) + sizeof( LockEvent::type ) );
}
}
sz = m_data.messages.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.messages )
{
const auto ptr = (uint64_t)v;
f.Write( &ptr, sizeof( ptr ) );
f.Write( v, sizeof( MessageData::time ) + sizeof( MessageData::ref ) );
}
sz = m_data.threads.size();
f.Write( &sz, sizeof( sz ) );
for( auto& thread : m_data.threads )
{
f.Write( &thread->id, sizeof( thread->id ) );
f.Write( &thread->count, sizeof( thread->count ) );
WriteTimeline( f, thread->timeline );
sz = thread->messages.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : thread->messages )
{
auto ptr = uint64_t( v );
f.Write( &ptr, sizeof( ptr ) );
}
}
sz = m_data.gpuData.size();
f.Write( &sz, sizeof( sz ) );
for( auto& ctx : m_data.gpuData )
{
f.Write( &ctx->thread, sizeof( ctx->thread ) );
f.Write( &ctx->accuracyBits, sizeof( ctx->accuracyBits ) );
f.Write( &ctx->count, sizeof( ctx->count ) );
f.Write( &ctx->period, sizeof( ctx->period ) );
WriteTimeline( f, ctx->timeline );
}
2018-08-04 14:33:03 +00:00
sz = m_data.plots.Data().size();
for( auto& plot : m_data.plots.Data() ) { if( plot->type != PlotType::User ) sz--; }
f.Write( &sz, sizeof( sz ) );
2018-08-04 14:33:03 +00:00
for( auto& plot : m_data.plots.Data() )
{
if( plot->type != PlotType::User ) continue;
f.Write( &plot->name, sizeof( plot->name ) );
f.Write( &plot->min, sizeof( plot->min ) );
f.Write( &plot->max, sizeof( plot->max ) );
sz = plot->data.size();
f.Write( &sz, sizeof( sz ) );
f.Write( plot->data.data(), sizeof( PlotItem ) * sz );
}
2018-04-02 00:05:16 +00:00
sz = m_data.memory.data.size();
f.Write( &sz, sizeof( sz ) );
sz = m_data.memory.active.size();
f.Write( &sz, sizeof( sz ) );
sz = m_data.memory.frees.size();
f.Write( &sz, sizeof( sz ) );
2018-04-02 00:05:16 +00:00
for( auto& mem : m_data.memory.data )
{
f.Write( &mem, sizeof( MemEvent::ptr ) + sizeof( MemEvent::size ) + sizeof( MemEvent::timeAlloc ) + sizeof( MemEvent::timeFree ) + sizeof( MemEvent::csAlloc ) + sizeof( MemEvent::csFree ) );
2018-05-04 13:11:19 +00:00
uint64_t t[2];
t[0] = DecompressThread( mem.threadAlloc );
t[1] = DecompressThread( mem.threadFree );
f.Write( &t, sizeof( t ) );
2018-04-02 00:05:16 +00:00
}
f.Write( &m_data.memory.high, sizeof( m_data.memory.high ) );
f.Write( &m_data.memory.low, sizeof( m_data.memory.low ) );
f.Write( &m_data.memory.usage, sizeof( m_data.memory.usage ) );
2018-06-19 19:39:52 +00:00
2018-06-19 19:52:54 +00:00
sz = m_data.callstackPayload.size() - 1;
2018-06-19 19:39:52 +00:00
f.Write( &sz, sizeof( sz ) );
2018-06-19 23:54:27 +00:00
for( size_t i=1; i<=sz; i++ )
2018-06-19 19:39:52 +00:00
{
2018-06-19 19:52:54 +00:00
auto cs = m_data.callstackPayload[i];
2018-06-19 19:39:52 +00:00
uint8_t csz = cs->size();
f.Write( &csz, sizeof( csz ) );
f.Write( cs->data(), sizeof( uint64_t ) * csz );
}
2018-06-19 23:59:25 +00:00
sz = m_data.callstackFrameMap.size();
f.Write( &sz, sizeof( sz ) );
for( auto& frame : m_data.callstackFrameMap )
{
f.Write( &frame.first, sizeof( uint64_t ) );
f.Write( frame.second, sizeof( CallstackFrame ) );
}
}
void Worker::WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec )
{
uint64_t sz = vec.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : vec )
{
f.Write( v, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) );
Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
2018-07-22 14:05:50 +00:00
if( v->child < 0 )
{
sz = 0;
f.Write( &sz, sizeof( sz ) );
}
else
{
WriteTimeline( f, GetZoneChildren( v->child ) );
}
}
}
void Worker::WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec )
{
uint64_t sz = vec.size();
f.Write( &sz, sizeof( sz ) );
for( auto& v : vec )
{
f.Write( v, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) + sizeof( GpuEvent::callstack ) );
uint64_t thread = DecompressThread( v->thread );
f.Write( &thread, sizeof( thread ) );
if( v->child < 0 )
{
sz = 0;
f.Write( &sz, sizeof( sz ) );
}
else
{
WriteTimeline( f, GetGpuChildren( v->child ) );
}
}
}
}