Store source location in a single object.

Source file, function name and line number are now stored in a const
static container object. This has the following benefits:
- Slightly lighter profiling workload (3 instructions less).
- Profiling queue event size is significantly reduced, by 12 bytes. This
  has an effect on all queue event types.
- Source location grouping has now no cost, as it's performed at the
  compilation stage. This allows simplification of server code.
The downside is that the full source location resolution is now
performed in two steps, as the server has to query both source location
container and strings contained within. This has almost no real impact
on profiler operation.
This commit is contained in:
Bartosz Taudul 2017-09-26 02:28:14 +02:00
parent 9cb12a05b3
commit 7424077d70
10 changed files with 103 additions and 72 deletions

View File

@ -13,8 +13,8 @@
#include "TracyProfiler.hpp"
#include "TracyScoped.hpp"
#define ZoneScoped tracy::ScopedZone ___tracy_scoped_zone( __FILE__, __FUNCTION__, __LINE__, 0 );
#define ZoneScopedC( color ) tracy::ScopedZone ___tracy_scoped_zone( __FILE__, __FUNCTION__, __LINE__, color );
#define ZoneScoped static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, __LINE__ }; tracy::ScopedZone ___tracy_scoped_zone( &__tracy_source_location, 0 );
#define ZoneScopedC( color ) static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, __LINE__ }; tracy::ScopedZone ___tracy_scoped_zone( &__tracy_source_location, color );
#define FrameMark tracy::Profiler::FrameMark();

View File

@ -216,6 +216,27 @@ bool Profiler::SendString( uint64_t str, const char* ptr, QueueType type )
return SendData( buf, sizeof( hdr ) + sizeof( l16 ) + l16 );
}
bool Profiler::SendSourceLocation( uint64_t ptr )
{
auto srcloc = (const SourceLocation*)ptr;
QueueItem item;
item.hdr.type = QueueType::SourceLocation;
item.hdr.id = ptr;
item.srcloc.file = (uint64_t)srcloc->file;
item.srcloc.function = (uint64_t)srcloc->function;
item.srcloc.line = srcloc->line;
const auto sz = QueueDataSize[item.hdr.idx];
auto buf = m_buffer + m_bufferOffset;
memcpy( buf, &item, sz );
m_bufferOffset += sz;
if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0;
return SendData( buf, sz );
}
bool Profiler::HandleServerQuery()
{
timeval tv;
@ -243,6 +264,9 @@ bool Profiler::HandleServerQuery()
SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName );
}
break;
case ServerQuerySourceLocation:
SendSourceLocation( ptr );
break;
default:
assert( false );
break;
@ -277,7 +301,7 @@ void Profiler::CalibrateTimer()
class FakeZone
{
public:
FakeZone( const char* file, const char* function, uint32_t line, uint32_t color ) {}
FakeZone( const SourceLocation* srcloc, uint32_t color ) {}
~FakeZone() {}
private:
@ -291,17 +315,20 @@ void Profiler::CalibrateDelay()
static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
for( int i=0; i<Iterations; i++ )
{
ScopedZone ___tracy_scoped_zone( __FILE__, __FUNCTION__, __LINE__, 0 );
static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, __LINE__ };
ScopedZone ___tracy_scoped_zone( &__tracy_source_location, 0 );
}
const auto f0 = GetTime();
for( int i=0; i<Iterations; i++ )
{
FakeZone ___tracy_scoped_zone( __FILE__, __FUNCTION__, __LINE__, 0 );
static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, __LINE__ };
FakeZone ___tracy_scoped_zone( &__tracy_source_location, 0 );
}
const auto t0 = GetTime();
for( int i=0; i<Iterations; i++ )
{
ScopedZone ___tracy_scoped_zone( __FILE__, __FUNCTION__, __LINE__, 0 );
static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, __LINE__ };
ScopedZone ___tracy_scoped_zone( &__tracy_source_location, 0 );
}
const auto t1 = GetTime();
const auto dt = t1 - t0;

View File

@ -18,6 +18,13 @@ namespace tracy
class Socket;
struct SourceLocation
{
const char* function;
const char* file;
uint32_t line;
};
class Profiler
{
public:
@ -46,6 +53,7 @@ private:
bool SendData( const char* data, size_t len );
bool SendString( uint64_t ptr, const char* str, QueueType type );
bool SendSourceLocation( uint64_t ptr );
bool HandleServerQuery();

View File

@ -12,8 +12,8 @@ namespace tracy
class ScopedZone
{
public:
ScopedZone( const char* file, const char* function, uint32_t line, uint32_t color )
: m_id( Profiler::ZoneBegin( QueueZoneBegin { Profiler::GetTime(), (uint64_t)file, (uint64_t)function, line, GetThreadHandle(), color } ) )
ScopedZone( const SourceLocation* srcloc, uint32_t color )
: m_id( Profiler::ZoneBegin( QueueZoneBegin { Profiler::GetTime(), (uint64_t)srcloc, GetThreadHandle(), color } ) )
{
}

View File

@ -19,7 +19,8 @@ static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 strea
enum ServerQuery : uint8_t
{
ServerQueryString,
ServerQueryThreadString
ServerQueryThreadString,
ServerQuerySourceLocation
};
#pragma pack( 1 )

View File

@ -13,6 +13,7 @@ enum class QueueType : uint8_t
StringData,
ThreadName,
FrameMark,
SourceLocation,
NUM_TYPES
};
@ -21,9 +22,7 @@ enum class QueueType : uint8_t
struct QueueZoneBegin
{
int64_t time;
uint64_t filename; // ptr
uint64_t function; // ptr
uint32_t line;
uint64_t srcloc; // ptr
uint64_t thread;
uint32_t color;
};
@ -33,6 +32,13 @@ struct QueueZoneEnd
int64_t time;
};
struct QueueSourceLocation
{
uint64_t function; // ptr
uint64_t file; // ptr
uint32_t line;
};
struct QueueHeader
{
union
@ -50,6 +56,7 @@ struct QueueItem
{
QueueZoneBegin zoneBegin;
QueueZoneEnd zoneEnd;
QueueSourceLocation srcloc;
};
};
@ -63,6 +70,7 @@ static const size_t QueueDataSize[] = {
sizeof( QueueHeader ),
sizeof( QueueHeader ),
sizeof( QueueHeader ),
sizeof( QueueHeader ) + sizeof( QueueSourceLocation ),
};
static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType::NUM_TYPES, "QueueDataSize mismatch" );

View File

@ -10,7 +10,7 @@ struct Event
{
int64_t start;
int64_t end;
uint32_t srcloc;
uint64_t srcloc;
uint32_t color;
Event* parent;

View File

@ -1,37 +0,0 @@
#ifndef __TRACYSOURCELOCATION_HPP__
#define __TRACYSOURCELOCATION_HPP__
#include <functional>
#include <stdint.h>
#include <string.h>
namespace tracy
{
struct SourceLocation
{
uint64_t filename;
uint64_t function;
uint32_t line;
struct Hasher
{
size_t operator()( const SourceLocation& v ) const
{
const static std::hash<uint64_t> hash;
return hash( v.filename ) ^ hash( v.function ) ^ hash( v.line );
}
};
struct Comparator
{
bool operator()( const SourceLocation& lhs, const SourceLocation& rhs ) const
{
return memcmp( &lhs, &rhs, sizeof( SourceLocation ) ) == 0;
}
};
};
}
#endif

View File

@ -219,6 +219,9 @@ void View::Process( const QueueItem& ev )
case QueueType::FrameMark:
ProcessFrameMark( ev.hdr.id );
break;
case QueueType::SourceLocation:
AddSourceLocation( ev.hdr.id, ev.srcloc );
break;
default:
assert( false );
break;
@ -230,27 +233,14 @@ void View::ProcessZoneBegin( uint64_t id, const QueueZoneBegin& ev )
auto it = m_pendingEndZone.find( id );
auto zone = m_slab.Alloc<Event>();
CheckString( ev.filename );
CheckString( ev.function );
CheckSourceLocation( ev.srcloc );
CheckThreadString( ev.thread );
zone->start = ev.time * m_timerMul;
zone->srcloc = ev.srcloc;
zone->color = ev.color;
SourceLocation srcloc { ev.filename, ev.function, ev.line };
auto lit = m_locationRef.find( srcloc );
std::unique_lock<std::mutex> lock( m_lock );
if( lit == m_locationRef.end() )
{
const auto ref = uint32_t( m_srcFile.size() );
zone->srcloc = ref;
m_locationRef.emplace( srcloc, ref );
m_srcFile.push_back( srcloc );
}
else
{
zone->srcloc = lit->second;
}
if( it == m_pendingEndZone.end() )
{
@ -331,6 +321,18 @@ void View::CheckThreadString( uint64_t id )
m_sock.Send( &id, sizeof( id ) );
}
void View::CheckSourceLocation( uint64_t ptr )
{
if( m_sourceLocation.find( ptr ) != m_sourceLocation.end() ) return;
if( m_pendingSourceLocation.find( ptr ) != m_pendingSourceLocation.end() ) return;
m_pendingSourceLocation.emplace( ptr );
uint8_t type = ServerQuerySourceLocation;
m_sock.Send( &type, sizeof( type ) );
m_sock.Send( &ptr, sizeof( ptr ) );
}
void View::AddString( uint64_t ptr, std::string&& str )
{
assert( m_strings.find( ptr ) == m_strings.end() );
@ -351,6 +353,17 @@ void View::AddThreadString( uint64_t id, std::string&& str )
m_threadNames.emplace( id, std::move( str ) );
}
void View::AddSourceLocation( uint64_t ptr, const QueueSourceLocation& srcloc )
{
assert( m_sourceLocation.find( ptr ) == m_sourceLocation.end() );
auto it = m_pendingSourceLocation.find( ptr );
assert( it != m_pendingSourceLocation.end() );
m_pendingSourceLocation.erase( it );
CheckString( srcloc.file );
CheckString( srcloc.function );
std::lock_guard<std::mutex> lock( m_lock );
m_sourceLocation.emplace( ptr, srcloc );
}
void View::NewZone( Event* zone, uint64_t thread )
{
@ -1043,8 +1056,17 @@ int View::DrawZoneLevel( const Vector<Event*>& vec, bool hover, double pxns, con
}
else
{
const auto& srcFile = m_srcFile[ev.srcloc];
const char* func = GetString( srcFile.function );
const char* func = "???";
const char* filename = "???";
uint32_t line = 0;
auto srcit = m_sourceLocation.find( ev.srcloc );
if( srcit != m_sourceLocation.end() )
{
func = GetString( srcit->second.function );
filename = GetString( srcit->second.file );
line = srcit->second.line;
}
const auto tsz = ImGui::CalcTextSize( func );
const auto pr0 = ( ev.start - m_zvStart ) * pxns;
const auto pr1 = ( end - m_zvStart ) * pxns;
@ -1082,7 +1104,7 @@ int View::DrawZoneLevel( const Vector<Event*>& vec, bool hover, double pxns, con
{
ImGui::BeginTooltip();
ImGui::Text( "%s", func );
ImGui::Text( "%s:%i", GetString( srcFile.filename ), srcFile.line );
ImGui::Text( "%s:%i", filename, line );
ImGui::Text( "Execution time: %s", TimeToString( end - ev.start ) );
ImGui::Text( "Without profiling: %s", TimeToString( end - ev.start - m_delay ) );
ImGui::EndTooltip();

View File

@ -14,7 +14,6 @@
#include "../common/TracyQueue.hpp"
#include "TracyEvent.hpp"
#include "TracySlab.hpp"
#include "TracySourceLocation.hpp"
#include "TracyVector.hpp"
struct ImVec2;
@ -53,8 +52,11 @@ private:
void CheckString( uint64_t ptr );
void CheckThreadString( uint64_t id );
void CheckSourceLocation( uint64_t ptr );
void AddString( uint64_t ptr, std::string&& str );
void AddThreadString( uint64_t id, std::string&& str );
void AddSourceLocation( uint64_t id, const QueueSourceLocation& srcloc );
void NewZone( Event* zone, uint64_t thread );
void UpdateZone( Event* zone );
@ -87,10 +89,10 @@ private:
// this block must be locked
std::mutex m_lock;
Vector<uint64_t> m_frames;
Vector<SourceLocation> m_srcFile;
Vector<ThreadData> m_threads;
std::unordered_map<uint64_t, std::string> m_strings;
std::unordered_map<uint64_t, std::string> m_threadNames;
std::unordered_map<uint64_t, QueueSourceLocation> m_sourceLocation;
uint64_t m_zonesCnt;
std::mutex m_mbpslock;
@ -101,7 +103,7 @@ private:
std::unordered_map<uint64_t, Event*> m_openZones;
std::unordered_set<uint64_t> m_pendingStrings;
std::unordered_set<uint64_t> m_pendingThreads;
std::unordered_map<SourceLocation, uint32_t, SourceLocation::Hasher, SourceLocation::Comparator> m_locationRef;
std::unordered_set<uint64_t> m_pendingSourceLocation;
std::unordered_map<uint64_t, uint32_t> m_threadMap;
Slab<EventSize*1024*1024> m_slab;