tracy/client/TracyProfiler.hpp

577 lines
19 KiB
C++
Raw Normal View History

2017-09-10 15:43:56 +00:00
#ifndef __TRACYPROFILER_HPP__
#define __TRACYPROFILER_HPP__
2018-08-05 00:09:59 +00:00
#include <assert.h>
2017-09-10 15:43:56 +00:00
#include <atomic>
#include <chrono>
2017-09-10 18:08:42 +00:00
#include <stdint.h>
2017-10-14 15:20:37 +00:00
#include <string.h>
2017-09-10 15:43:56 +00:00
#include "concurrentqueue.h"
#include "TracyCallstack.hpp"
2019-02-21 20:59:02 +00:00
#include "TracySysTime.hpp"
2018-04-01 17:53:05 +00:00
#include "TracyFastVector.hpp"
2017-09-13 20:56:08 +00:00
#include "../common/TracyQueue.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2018-07-13 22:39:01 +00:00
#include "../common/TracyMutex.hpp"
2017-11-11 18:44:09 +00:00
#include "../common/TracySystem.hpp"
2017-09-10 18:09:14 +00:00
2019-01-19 11:03:30 +00:00
#if defined _WIN32 || defined __CYGWIN__
# include <intrin.h>
#endif
#ifdef __APPLE__
# include <TargetConditionals.h>
# include <mach/mach_time.h>
#endif
2019-01-19 11:03:30 +00:00
#if defined _WIN32 || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6
# define TRACY_HW_TIMER
2019-01-19 11:03:30 +00:00
# if defined _WIN32 || defined __CYGWIN__
// Enable optimization for MSVC __rdtscp() intrin, saving one LHS of a cpu value on the stack.
// This comes at the cost of an unaligned memory write.
# define TRACY_RDTSCP_OPT
# endif
#endif
2019-01-14 19:55:37 +00:00
#ifndef TracyConcat
# define TracyConcat(x,y) TracyConcatIndirect(x,y)
#endif
#ifndef TracyConcatIndirect
# define TracyConcatIndirect(x,y) x##y
#endif
2017-09-10 15:43:56 +00:00
namespace tracy
{
2019-02-19 18:33:37 +00:00
class GpuCtx;
2019-02-19 17:38:08 +00:00
class Profiler;
class Socket;
2019-06-17 00:25:09 +00:00
class UdpBroadcast;
2019-02-19 18:33:37 +00:00
struct GpuCtxWrapper
{
2019-02-19 18:33:37 +00:00
GpuCtx* ptr;
};
TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken();
TRACY_API Profiler& GetProfiler();
TRACY_API std::atomic<uint32_t>& GetLockCounter();
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
TRACY_API GpuCtxWrapper& GetGpuCtx();
TRACY_API uint64_t GetThreadHandle();
2019-06-11 21:51:12 +00:00
TRACY_API void InitRPMallocThread();
2019-02-19 18:33:37 +00:00
struct SourceLocationData
2017-11-14 22:29:48 +00:00
{
2019-02-19 18:33:37 +00:00
const char* name;
const char* function;
const char* file;
uint32_t line;
uint32_t color;
2017-11-14 22:29:48 +00:00
};
2018-07-13 18:20:37 +00:00
#ifdef TRACY_ON_DEMAND
struct LuaZoneState
{
uint32_t counter;
bool active;
};
#endif
2019-02-19 17:45:41 +00:00
using Magic = moodycamel::ConcurrentQueueDefaultTraits::index_t;
2017-10-03 12:50:55 +00:00
#if __ARM_ARCH >= 6 && !defined TARGET_OS_IOS
2018-04-27 14:58:45 +00:00
extern int64_t (*GetTimeImpl)();
#endif
2018-04-01 17:53:05 +00:00
2017-09-10 15:43:56 +00:00
class Profiler
{
2019-06-26 20:50:56 +00:00
struct FrameImageQueueItem
{
void* image;
uint64_t frame;
2019-06-26 20:50:56 +00:00
uint16_t w;
uint16_t h;
uint8_t offset;
bool flip;
};
2017-09-10 15:43:56 +00:00
public:
Profiler();
~Profiler();
static tracy_force_inline int64_t GetTime( uint32_t& cpu )
{
#ifdef TRACY_HW_TIMER
# if TARGET_OS_IOS == 1
cpu = 0xFFFFFFFF;
return mach_absolute_time();
# elif __ARM_ARCH >= 6
cpu = 0xFFFFFFFF;
2018-04-27 14:58:45 +00:00
return GetTimeImpl();
2019-01-19 11:03:30 +00:00
# elif defined _WIN32 || defined __CYGWIN__
const auto t = int64_t( __rdtscp( &cpu ) );
2017-10-01 17:11:01 +00:00
return t;
# elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint32_t eax, edx;
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (cpu) :: );
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
# endif
#else
cpu = 0xFFFFFFFF;
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
2017-10-03 13:35:43 +00:00
#endif
}
static tracy_force_inline int64_t GetTime()
{
#ifdef TRACY_HW_TIMER
# if TARGET_OS_IOS == 1
return mach_absolute_time();
# elif __ARM_ARCH >= 6
2018-04-27 14:58:45 +00:00
return GetTimeImpl();
2019-01-19 11:03:30 +00:00
# elif defined _WIN32 || defined __CYGWIN__
2018-04-26 14:12:52 +00:00
unsigned int dontcare;
const auto t = int64_t( __rdtscp( &dontcare ) );
return t;
# elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint32_t eax, edx;
2017-10-29 15:20:07 +00:00
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx) :: "%ecx" );
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
# endif
#else
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
#endif
}
tracy_force_inline uint32_t GetNextZoneId()
{
return m_zoneId.fetch_add( 1, std::memory_order_relaxed );
}
static tracy_force_inline void SendFrameMark( const char* name )
{
if( !name ) GetProfiler().m_frameCount.fetch_add( 1, std::memory_order_relaxed );
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
2018-07-10 20:23:27 +00:00
#endif
2017-10-03 12:50:55 +00:00
Magic magic;
2019-02-19 17:27:00 +00:00
auto token = GetToken();
2017-10-10 23:27:22 +00:00
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::FrameMarkMsg );
MemWrite( &item->frameMark.time, GetTime() );
MemWrite( &item->frameMark.name, uint64_t( name ) );
2017-10-10 23:27:22 +00:00
tail.store( magic + 1, std::memory_order_release );
}
2017-09-10 18:09:14 +00:00
2018-08-05 00:09:59 +00:00
static tracy_force_inline void SendFrameMark( const char* name, QueueType type )
2018-08-04 13:04:18 +00:00
{
assert( type == QueueType::FrameMarkMsgStart || type == QueueType::FrameMarkMsgEnd );
2018-08-04 13:04:18 +00:00
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
2018-08-04 13:04:18 +00:00
#endif
GetProfiler().m_serialLock.lock();
auto item = GetProfiler().m_serialQueue.prepare_next();
2018-08-05 00:09:59 +00:00
MemWrite( &item->hdr.type, type );
2018-08-04 13:04:18 +00:00
MemWrite( &item->frameMark.time, GetTime() );
MemWrite( &item->frameMark.name, uint64_t( name ) );
GetProfiler().m_serialQueue.commit_next();
GetProfiler().m_serialLock.unlock();
2018-08-04 13:04:18 +00:00
}
2019-06-12 13:28:32 +00:00
static tracy_force_inline void SendFrameImage( void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip )
2019-06-06 19:39:54 +00:00
{
2019-06-26 20:50:56 +00:00
auto& profiler = GetProfiler();
2019-06-06 19:39:54 +00:00
#ifdef TRACY_ON_DEMAND
2019-06-26 20:50:56 +00:00
if( !profiler.IsConnected() ) return;
2019-06-06 19:39:54 +00:00
#endif
const auto sz = size_t( w ) * size_t( h ) * 4;
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, image, sz );
2019-06-26 20:50:56 +00:00
profiler.m_fiLock.lock();
auto fi = profiler.m_fiQueue.prepare_next();
fi->image = ptr;
fi->frame = profiler.m_frameCount.load( std::memory_order_relaxed ) - offset;
2019-06-26 20:50:56 +00:00
fi->w = w;
fi->h = h;
fi->flip = flip;
profiler.m_fiQueue.commit_next();
profiler.m_fiLock.unlock();
2019-06-06 19:39:54 +00:00
}
2017-10-13 00:21:29 +00:00
static tracy_force_inline void PlotData( const char* name, int64_t val )
{
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
#endif
2017-10-13 00:21:29 +00:00
Magic magic;
2019-02-19 17:27:00 +00:00
auto token = GetToken();
2017-10-13 00:21:29 +00:00
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Int );
MemWrite( &item->plotData.data.i, val );
2017-10-13 00:21:29 +00:00
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void PlotData( const char* name, float val )
{
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
#endif
2017-10-13 00:21:29 +00:00
Magic magic;
2019-02-19 17:27:00 +00:00
auto token = GetToken();
2017-10-13 00:21:29 +00:00
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Float );
MemWrite( &item->plotData.data.f, val );
2017-10-13 00:21:29 +00:00
tail.store( magic + 1, std::memory_order_release );
}
2017-10-13 00:07:03 +00:00
static tracy_force_inline void PlotData( const char* name, double val )
{
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
#endif
2017-10-13 00:07:03 +00:00
Magic magic;
2019-02-19 17:27:00 +00:00
auto token = GetToken();
2017-10-13 00:07:03 +00:00
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Double );
MemWrite( &item->plotData.data.d, val );
2017-10-13 00:07:03 +00:00
tail.store( magic + 1, std::memory_order_release );
}
2017-10-14 11:23:13 +00:00
static tracy_force_inline void Message( const char* txt, size_t size )
{
2018-07-10 21:09:59 +00:00
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
2018-07-10 21:09:59 +00:00
#endif
2017-10-14 11:23:13 +00:00
Magic magic;
2019-06-24 17:38:44 +00:00
const auto thread = GetThreadHandle();
2019-02-19 17:27:00 +00:00
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
2017-10-14 11:23:13 +00:00
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::Message );
MemWrite( &item->message.time, GetTime() );
2019-06-24 17:38:44 +00:00
MemWrite( &item->message.thread, thread );
MemWrite( &item->message.text, (uint64_t)ptr );
2017-10-14 11:23:13 +00:00
tail.store( magic + 1, std::memory_order_release );
}
2017-10-15 11:06:49 +00:00
static tracy_force_inline void Message( const char* txt )
{
2018-07-10 21:09:59 +00:00
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
2018-07-10 21:09:59 +00:00
#endif
2017-10-15 11:06:49 +00:00
Magic magic;
2019-06-24 17:38:44 +00:00
const auto thread = GetThreadHandle();
2019-02-19 17:27:00 +00:00
auto token = GetToken();
2017-10-15 11:06:49 +00:00
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::MessageLiteral );
MemWrite( &item->message.time, GetTime() );
2019-06-24 17:38:44 +00:00
MemWrite( &item->message.thread, thread );
MemWrite( &item->message.text, (uint64_t)txt );
2017-10-15 11:06:49 +00:00
tail.store( magic + 1, std::memory_order_release );
}
2019-05-10 18:17:44 +00:00
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
2019-06-24 17:38:44 +00:00
const auto thread = GetThreadHandle();
2019-05-10 18:17:44 +00:00
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::MessageColor );
MemWrite( &item->messageColor.time, GetTime() );
2019-06-24 17:38:44 +00:00
MemWrite( &item->messageColor.thread, thread );
2019-05-10 18:17:44 +00:00
MemWrite( &item->messageColor.text, (uint64_t)ptr );
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void MessageColor( const char* txt, uint32_t color )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
2019-06-24 17:38:44 +00:00
const auto thread = GetThreadHandle();
2019-05-10 18:17:44 +00:00
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::MessageLiteralColor );
MemWrite( &item->messageColor.time, GetTime() );
2019-06-24 17:38:44 +00:00
MemWrite( &item->messageColor.thread, thread );
2019-05-10 18:17:44 +00:00
MemWrite( &item->messageColor.text, (uint64_t)txt );
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
tail.store( magic + 1, std::memory_order_release );
}
2018-03-31 19:56:05 +00:00
static tracy_force_inline void MemAlloc( const void* ptr, size_t size )
{
2018-07-11 23:36:01 +00:00
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
2018-07-11 23:36:01 +00:00
#endif
const auto thread = GetThreadHandle();
2019-02-19 17:38:08 +00:00
GetProfiler().m_serialLock.lock();
2018-06-20 21:29:44 +00:00
SendMemAlloc( QueueType::MemAlloc, thread, ptr, size );
2019-02-19 17:38:08 +00:00
GetProfiler().m_serialLock.unlock();
2018-03-31 19:56:05 +00:00
}
static tracy_force_inline void MemFree( const void* ptr )
{
2018-07-11 23:36:01 +00:00
#ifdef TRACY_ON_DEMAND
2019-02-19 17:38:08 +00:00
if( !GetProfiler().IsConnected() ) return;
2018-07-11 23:36:01 +00:00
#endif
const auto thread = GetThreadHandle();
2019-02-19 17:38:08 +00:00
GetProfiler().m_serialLock.lock();
2018-06-20 21:29:44 +00:00
SendMemFree( QueueType::MemFree, thread, ptr );
2019-02-19 17:38:08 +00:00
GetProfiler().m_serialLock.unlock();
2018-03-31 19:56:05 +00:00
}
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth )
{
auto& profiler = GetProfiler();
#ifdef TRACY_HAS_CALLSTACK
2018-07-11 23:36:01 +00:00
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
2018-07-11 23:36:01 +00:00
# endif
const auto thread = GetThreadHandle();
rpmalloc_thread_initialize();
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
2018-06-20 21:29:44 +00:00
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
SendCallstackMemory( callstack );
profiler.m_serialLock.unlock();
#else
MemAlloc( ptr, size );
#endif
}
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth )
{
auto& profiler = GetProfiler();
#ifdef TRACY_HAS_CALLSTACK
2018-07-11 23:36:01 +00:00
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
2018-07-11 23:36:01 +00:00
# endif
const auto thread = GetThreadHandle();
rpmalloc_thread_initialize();
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
2018-06-20 21:29:44 +00:00
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
SendCallstackMemory( callstack );
profiler.m_serialLock.unlock();
#else
MemFree( ptr );
#endif
}
2018-06-21 22:56:01 +00:00
static tracy_force_inline void SendCallstack( int depth, uint64_t thread )
{
#ifdef TRACY_HAS_CALLSTACK
auto ptr = Callstack( depth );
Magic magic;
2019-02-19 17:27:00 +00:00
auto token = GetToken();
2018-06-21 22:56:01 +00:00
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
2018-06-21 22:56:01 +00:00
MemWrite( &item->hdr.type, QueueType::Callstack );
MemWrite( &item->callstack.ptr, ptr );
MemWrite( &item->callstack.thread, thread );
tail.store( magic + 1, std::memory_order_release );
#endif
}
void SendCallstack( int depth, uint64_t thread, const char* skipBefore );
static void CutCallstack( void* callstack, const char* skipBefore );
static bool ShouldExit();
2018-07-10 19:50:00 +00:00
#ifdef TRACY_ON_DEMAND
tracy_force_inline bool IsConnected() const
2018-07-10 19:50:00 +00:00
{
return m_isConnected.load( std::memory_order_acquire );
2018-07-10 19:50:00 +00:00
}
2018-07-11 10:21:39 +00:00
tracy_force_inline uint64_t ConnectionId() const
{
return m_connectionId.load( std::memory_order_acquire );
}
2018-07-11 10:21:39 +00:00
tracy_force_inline void DeferItem( const QueueItem& item )
{
m_deferredLock.lock();
auto dst = m_deferredQueue.push_next();
memcpy( dst, &item, sizeof( item ) );
m_deferredLock.unlock();
}
2018-07-10 19:50:00 +00:00
#endif
void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); }
bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); }
2017-09-10 15:43:56 +00:00
private:
2019-06-09 14:14:30 +00:00
enum class DequeueStatus { Success, ConnectionLost, QueueEmpty };
2017-10-18 16:48:51 +00:00
static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); }
2017-09-10 15:43:56 +00:00
void Worker();
2019-06-26 20:57:24 +00:00
static void LaunchCompressWorker( void* ptr ) { ((Profiler*)ptr)->CompressWorker(); }
void CompressWorker();
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
2018-04-01 18:04:35 +00:00
DequeueStatus DequeueSerial();
bool AppendData( const void* data, size_t len );
bool CommitData();
bool NeedDataSize( size_t len );
2017-10-18 16:48:51 +00:00
2018-06-23 00:16:58 +00:00
tracy_force_inline void AppendDataUnsafe( const void* data, size_t len )
{
memcpy( m_buffer + m_bufferOffset, data, len );
m_bufferOffset += int( len );
}
bool SendData( const char* data, size_t len );
2018-06-19 17:00:57 +00:00
void SendString( uint64_t ptr, const char* str, QueueType type );
void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type );
void SendSourceLocation( uint64_t ptr );
2018-06-19 17:00:57 +00:00
void SendSourceLocationPayload( uint64_t ptr );
2018-06-19 17:09:43 +00:00
void SendCallstackPayload( uint64_t ptr );
2019-02-28 19:30:07 +00:00
void SendCallstackAlloc( uint64_t ptr );
2018-06-19 23:06:31 +00:00
void SendCallstackFrame( uint64_t ptr );
bool HandleServerQuery();
2017-09-23 19:33:05 +00:00
void CalibrateTimer();
2017-09-24 14:02:09 +00:00
void CalibrateDelay();
2017-09-23 19:33:05 +00:00
static tracy_force_inline void SendCallstackMemory( void* ptr )
2018-06-20 21:30:19 +00:00
{
#ifdef TRACY_HAS_CALLSTACK
2019-02-19 17:38:08 +00:00
auto item = GetProfiler().m_serialQueue.prepare_next();
2018-06-20 21:30:19 +00:00
MemWrite( &item->hdr.type, QueueType::CallstackMemory );
MemWrite( &item->callstackMemory.ptr, (uint64_t)ptr );
2019-02-19 17:38:08 +00:00
GetProfiler().m_serialQueue.commit_next();
2018-06-20 21:30:19 +00:00
#endif
}
2018-06-20 21:29:44 +00:00
static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size )
{
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack );
2019-02-19 17:38:08 +00:00
auto item = GetProfiler().m_serialQueue.prepare_next();
2018-06-20 21:29:44 +00:00
MemWrite( &item->hdr.type, type );
MemWrite( &item->memAlloc.time, GetTime() );
MemWrite( &item->memAlloc.thread, thread );
MemWrite( &item->memAlloc.ptr, (uint64_t)ptr );
2018-08-01 12:07:30 +00:00
if( compile_time_condition<sizeof( size ) == 4>::value )
2018-06-20 21:29:44 +00:00
{
memcpy( &item->memAlloc.size, &size, 4 );
memset( &item->memAlloc.size + 4, 0, 2 );
}
else
{
assert( sizeof( size ) == 8 );
memcpy( &item->memAlloc.size, &size, 6 );
}
2019-02-19 17:38:08 +00:00
GetProfiler().m_serialQueue.commit_next();
2018-06-20 21:29:44 +00:00
}
static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr )
{
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack );
2019-02-19 17:38:08 +00:00
auto item = GetProfiler().m_serialQueue.prepare_next();
2018-06-20 21:29:44 +00:00
MemWrite( &item->hdr.type, type );
MemWrite( &item->memFree.time, GetTime() );
MemWrite( &item->memFree.thread, thread );
MemWrite( &item->memFree.ptr, (uint64_t)ptr );
2019-02-19 17:38:08 +00:00
GetProfiler().m_serialQueue.commit_next();
2018-06-20 21:29:44 +00:00
}
2017-09-23 19:33:05 +00:00
double m_timerMul;
2017-09-29 16:29:39 +00:00
uint64_t m_resolution;
2017-09-24 14:02:09 +00:00
uint64_t m_delay;
std::atomic<int64_t> m_timeBegin;
2017-09-22 23:37:07 +00:00
uint64_t m_mainThread;
uint64_t m_epoch;
2017-09-10 15:43:56 +00:00
std::atomic<bool> m_shutdown;
std::atomic<bool> m_shutdownManual;
std::atomic<bool> m_shutdownFinished;
2017-10-18 17:49:17 +00:00
Socket* m_sock;
2019-06-17 00:25:09 +00:00
UdpBroadcast* m_broadcast;
bool m_noExit;
std::atomic<uint32_t> m_zoneId;
void* m_stream; // LZ4_stream_t*
char* m_buffer;
int m_bufferOffset;
int m_bufferStart;
2017-11-02 11:56:13 +00:00
QueueItem* m_itemBuf;
2017-11-02 16:37:10 +00:00
char* m_lz4Buf;
2018-04-01 17:53:05 +00:00
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2018-07-13 22:39:01 +00:00
TracyMutex m_serialLock;
2018-07-10 19:50:00 +00:00
2019-06-26 20:50:56 +00:00
FastVector<FrameImageQueueItem> m_fiQueue, m_fiDequeue;
TracyMutex m_fiLock;
std::atomic<uint64_t> m_frameCount;
2018-07-10 19:50:00 +00:00
#ifdef TRACY_ON_DEMAND
std::atomic<bool> m_isConnected;
std::atomic<uint64_t> m_connectionId;
2018-07-11 10:14:28 +00:00
Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
2018-07-13 22:39:01 +00:00
TracyMutex m_deferredLock;
2018-07-11 10:14:28 +00:00
FastVector<QueueItem> m_deferredQueue;
2018-07-10 19:50:00 +00:00
#endif
2019-02-21 20:59:02 +00:00
#ifdef TRACY_HAS_SYSTIME
void ProcessSysTime();
SysTime m_sysTime;
uint64_t m_sysTimeLast = 0;
#else
void ProcessSysTime() {}
#endif
2017-09-10 15:43:56 +00:00
};
};
#endif