tracy/client/TracyProfiler.cpp
Bartosz Taudul 1ea61c2f2c Use LZ4 to compress network data.
This greatly reduces required network bandwidth, which in effect speeds
up queue processing.

Time to process a single event queue item:

      | Raw data | With LZ4 |
------+----------+----------+
Deque |  6.86 ns |   6.7 ns |
Pack  |  4.03 ns |   4.0 ns |
LZ4   |  ---     |  21.6 ns |
Send  | 214.5 ns |   5.2 ns |
------+----------+----------+
Total | 225.4 ns | 37.58 ns |
2017-09-12 02:13:22 +02:00

129 lines
3.2 KiB
C++
Executable File

#include <assert.h>
#include <chrono>
#include <limits>
#include <memory>
#include "../common/tracy_lz4.hpp"
#include "../common/TracySocket.hpp"
#include "TracyProfiler.hpp"
#include "TracySystem.hpp"
namespace tracy
{
extern const char* PointerCheckA;
const char* PointerCheckB = "tracy";
static inline int64_t GetTime()
{
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
}
static Profiler* s_instance = nullptr;
Profiler::Profiler()
: m_timeBegin( GetTime() )
, m_shutdown( false )
, m_id( 0 )
{
assert( PointerCheckA == PointerCheckB );
assert( !s_instance );
s_instance = this;
m_thread = std::thread( [this] { Worker(); } );
SetThreadName( m_thread, "Tracy Profiler" );
}
Profiler::~Profiler()
{
assert( s_instance );
s_instance = nullptr;
m_shutdown.store( true, std::memory_order_relaxed );
m_thread.join();
}
uint64_t Profiler::GetNewId()
{
return s_instance->m_id.fetch_add( 1, std::memory_order_relaxed );
}
void Profiler::ZoneBegin( QueueZoneBegin&& data )
{
QueueItem item;
item.hdr.type = QueueType::ZoneBegin;
item.hdr.time = GetTime();
item.zoneBegin = std::move( data );
s_instance->m_queue.enqueue( GetToken(), std::move( item ) );
}
void Profiler::ZoneEnd( QueueZoneEnd&& data )
{
QueueItem item;
item.hdr.type = QueueType::ZoneEnd;
item.hdr.time = GetTime();
item.zoneEnd = std::move( data );
s_instance->m_queue.enqueue( GetToken(), std::move( item ) );
}
Profiler* Profiler::Instance()
{
return s_instance;
}
void Profiler::Worker()
{
enum { TargetFrameSize = 64000 };
enum { BulkSize = TargetFrameSize / QueueItemSize };
enum { LZ4Size = LZ4_COMPRESSBOUND( TargetFrameSize ) };
static_assert( LZ4Size <= std::numeric_limits<uint16_t>::max(), "LZ4Size greater than uint16_t" );
moodycamel::ConsumerToken token( m_queue );
ListenSocket listen;
listen.Listen( "8086", 8 );
for(;;)
{
std::unique_ptr<Socket> sock;
for(;;)
{
if( m_shutdown.load( std::memory_order_relaxed ) ) return;
sock = listen.Accept();
if( sock ) break;
}
sock->Send( &m_timeBegin, sizeof( m_timeBegin ) );
for(;;)
{
if( m_shutdown.load( std::memory_order_relaxed ) ) return;
QueueItem item[BulkSize];
const auto sz = m_queue.try_dequeue_bulk( token, item, BulkSize );
if( sz > 0 )
{
char buf[TargetFrameSize];
char* ptr = buf;
for( int i=0; i<sz; i++ )
{
const auto dsz = QueueDataSize[(uint8_t)item[i].hdr.type];
memcpy( ptr, item+i, dsz );
ptr += dsz;
}
char lz4[LZ4Size + sizeof( uint16_t )];
const uint16_t lz4sz = LZ4_compress_default( buf, lz4+2, ptr - buf, LZ4Size );
memcpy( lz4, &lz4sz, sizeof( uint16_t ) );
if( sock->Send( lz4, lz4sz ) == -1 ) break;
}
else
{
std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
}
}
}
}
}