tracy/public/client/TracySysTrace.cpp
Robert Adam ece8779362 Fix cpuid symbol redefinition on older GCC versions
Since commit 940f32c1a8 building the Tracy
library on Linux using a GCC version < 11 would result in compile errors
due to symbol redefinitions of __get_cpuid_max, __get_cpuid and
__get_cpuid_count.

This is because prior to GCC 11 the cpuid.h header file did not have any
include guards and thus including this header more than once would
produce the abovementioned errors.

To work around this issue, including cpuid.h has been wrapped into a
custom header file that itself uses include guards and thus shields
cpuid.h from being included multiple times.

Fixes #452
2022-08-31 17:59:46 +02:00

1592 lines
54 KiB
C++

#include "TracyDebug.hpp"
#include "TracyStringHelpers.hpp"
#include "TracySysTrace.hpp"
#include "../common/TracySystem.hpp"
#ifdef TRACY_HAS_SYSTEM_TRACING
#ifndef TRACY_SAMPLING_HZ
# if defined _WIN32
# define TRACY_SAMPLING_HZ 8000
# elif defined __linux__
# define TRACY_SAMPLING_HZ 10000
# endif
#endif
namespace tracy
{
static constexpr int GetSamplingFrequency()
{
#if defined _WIN32
return TRACY_SAMPLING_HZ > 8000 ? 8000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ );
#else
return TRACY_SAMPLING_HZ > 1000000 ? 1000000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ );
#endif
}
static constexpr int GetSamplingPeriod()
{
return 1000000000 / GetSamplingFrequency();
}
}
# if defined _WIN32
# ifndef NOMINMAX
# define NOMINMAX
# endif
# define INITGUID
# include <assert.h>
# include <string.h>
# include <windows.h>
# include <dbghelp.h>
# include <evntrace.h>
# include <evntcons.h>
# include <psapi.h>
# include <winternl.h>
# include "../common/TracyAlloc.hpp"
# include "../common/TracySystem.hpp"
# include "TracyProfiler.hpp"
# include "TracyThread.hpp"
namespace tracy
{
static const GUID PerfInfoGuid = { 0xce1dbfb4, 0x137e, 0x4da6, { 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc } };
static const GUID DxgKrnlGuid = { 0x802ec45a, 0x1e99, 0x4b83, { 0x99, 0x20, 0x87, 0xc9, 0x82, 0x77, 0xba, 0x9d } };
static const GUID ThreadV2Guid = { 0x3d6fa8d1, 0xfe05, 0x11d0, { 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c } };
static TRACEHANDLE s_traceHandle;
static TRACEHANDLE s_traceHandle2;
static EVENT_TRACE_PROPERTIES* s_prop;
static DWORD s_pid;
static EVENT_TRACE_PROPERTIES* s_propVsync;
static TRACEHANDLE s_traceHandleVsync;
static TRACEHANDLE s_traceHandleVsync2;
Thread* s_threadVsync = nullptr;
struct CSwitch
{
uint32_t newThreadId;
uint32_t oldThreadId;
int8_t newThreadPriority;
int8_t oldThreadPriority;
uint8_t previousCState;
int8_t spareByte;
int8_t oldThreadWaitReason;
int8_t oldThreadWaitMode;
int8_t oldThreadState;
int8_t oldThreadWaitIdealProcessor;
uint32_t newThreadWaitTime;
uint32_t reserved;
};
struct ReadyThread
{
uint32_t threadId;
int8_t adjustReason;
int8_t adjustIncrement;
int8_t flag;
int8_t reserverd;
};
struct ThreadTrace
{
uint32_t processId;
uint32_t threadId;
uint32_t stackBase;
uint32_t stackLimit;
uint32_t userStackBase;
uint32_t userStackLimit;
uint32_t startAddr;
uint32_t win32StartAddr;
uint32_t tebBase;
uint32_t subProcessTag;
};
struct StackWalkEvent
{
uint64_t eventTimeStamp;
uint32_t stackProcess;
uint32_t stackThread;
uint64_t stack[192];
};
struct VSyncInfo
{
void* dxgAdapter;
uint32_t vidPnTargetId;
uint64_t scannedPhysicalAddress;
uint32_t vidPnSourceId;
uint32_t frameNumber;
int64_t frameQpcTime;
void* hFlipDevice;
uint32_t flipType;
uint64_t flipFenceId;
};
extern "C" typedef NTSTATUS (WINAPI *t_NtQueryInformationThread)( HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG );
extern "C" typedef BOOL (WINAPI *t_EnumProcessModules)( HANDLE, HMODULE*, DWORD, LPDWORD );
extern "C" typedef BOOL (WINAPI *t_GetModuleInformation)( HANDLE, HMODULE, LPMODULEINFO, DWORD );
extern "C" typedef DWORD (WINAPI *t_GetModuleBaseNameA)( HANDLE, HMODULE, LPSTR, DWORD );
extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
t_NtQueryInformationThread NtQueryInformationThread = (t_NtQueryInformationThread)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "NtQueryInformationThread" );
t_EnumProcessModules _EnumProcessModules = (t_EnumProcessModules)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32EnumProcessModules" );
t_GetModuleInformation _GetModuleInformation = (t_GetModuleInformation)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleInformation" );
t_GetModuleBaseNameA _GetModuleBaseNameA = (t_GetModuleBaseNameA)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleBaseNameA" );
static t_GetThreadDescription _GetThreadDescription = 0;
void WINAPI EventRecordCallback( PEVENT_RECORD record )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto& hdr = record->EventHeader;
switch( hdr.ProviderId.Data1 )
{
case 0x3d6fa8d1: // Thread Guid
if( hdr.EventDescriptor.Opcode == 36 )
{
const auto cswitch = (const CSwitch*)record->UserData;
TracyLfqPrepare( QueueType::ContextSwitch );
MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart );
MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId );
MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId );
MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 50 )
{
const auto rt = (const ReadyThread*)record->UserData;
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
MemWrite( &item->threadWakeup.thread, rt->threadId );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
{
const auto tt = (const ThreadTrace*)record->UserData;
uint64_t tid = tt->threadId;
if( tid == 0 ) return;
uint64_t pid = tt->processId;
TracyLfqPrepare( QueueType::TidToPid );
MemWrite( &item->tidToPid.tid, tid );
MemWrite( &item->tidToPid.pid, pid );
TracyLfqCommit;
}
break;
case 0xdef2fe46: // StackWalk Guid
if( hdr.EventDescriptor.Opcode == 32 )
{
const auto sw = (const StackWalkEvent*)record->UserData;
if( sw->stackProcess == s_pid )
{
const uint64_t sz = ( record->UserDataLength - 16 ) / 8;
if( sz > 0 )
{
auto trace = (uint64_t*)tracy_malloc( ( 1 + sz ) * sizeof( uint64_t ) );
memcpy( trace, &sz, sizeof( uint64_t ) );
memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz );
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp );
MemWrite( &item->callstackSampleFat.thread, sw->stackThread );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
}
}
break;
default:
break;
}
}
void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto& hdr = record->EventHeader;
assert( hdr.ProviderId.Data1 == 0x802EC45A );
assert( hdr.EventDescriptor.Id == 0x0011 );
const auto vs = (const VSyncInfo*)record->UserData;
TracyLfqPrepare( QueueType::FrameVsync );
MemWrite( &item->frameVsync.time, hdr.TimeStamp.QuadPart );
MemWrite( &item->frameVsync.id, vs->vidPnTargetId );
TracyLfqCommit;
}
static void SetupVsync()
{
#if _WIN32_WINNT >= _WIN32_WINNT_WINBLUE && !defined(__MINGW32__)
const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + MAX_PATH;
s_propVsync = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz );
memset( s_propVsync, 0, sizeof( EVENT_TRACE_PROPERTIES ) );
s_propVsync->LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
s_propVsync->Wnode.BufferSize = psz;
#ifdef TRACY_TIMER_QPC
s_propVsync->Wnode.ClientContext = 1;
#else
s_propVsync->Wnode.ClientContext = 3;
#endif
s_propVsync->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES );
strcpy( ((char*)s_propVsync) + sizeof( EVENT_TRACE_PROPERTIES ), "TracyVsync" );
auto backup = tracy_malloc( psz );
memcpy( backup, s_propVsync, psz );
const auto controlStatus = ControlTraceA( 0, "TracyVsync", s_propVsync, EVENT_TRACE_CONTROL_STOP );
if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND )
{
tracy_free( backup );
tracy_free( s_propVsync );
return;
}
memcpy( s_propVsync, backup, psz );
tracy_free( backup );
const auto startStatus = StartTraceA( &s_traceHandleVsync, "TracyVsync", s_propVsync );
if( startStatus != ERROR_SUCCESS )
{
tracy_free( s_propVsync );
return;
}
EVENT_FILTER_EVENT_ID fe = {};
fe.FilterIn = TRUE;
fe.Count = 1;
fe.Events[0] = 0x0011; // VSyncDPC_Info
EVENT_FILTER_DESCRIPTOR desc = {};
desc.Ptr = (ULONGLONG)&fe;
desc.Size = sizeof( fe );
desc.Type = EVENT_FILTER_TYPE_EVENT_ID;
ENABLE_TRACE_PARAMETERS params = {};
params.Version = ENABLE_TRACE_PARAMETERS_VERSION_2;
params.EnableProperty = EVENT_ENABLE_PROPERTY_IGNORE_KEYWORD_0;
params.SourceId = s_propVsync->Wnode.Guid;
params.EnableFilterDesc = &desc;
params.FilterDescCount = 1;
uint64_t mask = 0x4000000000000001; // Microsoft_Windows_DxgKrnl_Performance | Base
if( EnableTraceEx2( s_traceHandleVsync, &DxgKrnlGuid, EVENT_CONTROL_CODE_ENABLE_PROVIDER, TRACE_LEVEL_INFORMATION, mask, mask, 0, &params ) != ERROR_SUCCESS )
{
tracy_free( s_propVsync );
return;
}
char loggerName[MAX_PATH];
strcpy( loggerName, "TracyVsync" );
EVENT_TRACE_LOGFILEA log = {};
log.LoggerName = loggerName;
log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
log.EventRecordCallback = EventRecordCallbackVsync;
s_traceHandleVsync2 = OpenTraceA( &log );
if( s_traceHandleVsync2 == (TRACEHANDLE)INVALID_HANDLE_VALUE )
{
CloseTrace( s_traceHandleVsync );
tracy_free( s_propVsync );
return;
}
s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_threadVsync) Thread( [] (void*) {
ThreadExitHandler threadExitHandler;
SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
SetThreadName( "Tracy Vsync" );
ProcessTrace( &s_traceHandleVsync2, 1, nullptr, nullptr );
}, nullptr );
#endif
}
static constexpr int GetSamplingInterval()
{
return GetSamplingPeriod() / 100;
}
bool SysTraceStart( int64_t& samplingPeriod )
{
if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
s_pid = GetCurrentProcessId();
#if defined _WIN64
constexpr bool isOs64Bit = true;
#else
BOOL _iswow64;
IsWow64Process( GetCurrentProcess(), &_iswow64 );
const bool isOs64Bit = _iswow64;
#endif
TOKEN_PRIVILEGES priv = {};
priv.PrivilegeCount = 1;
priv.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
if( LookupPrivilegeValue( nullptr, SE_SYSTEM_PROFILE_NAME, &priv.Privileges[0].Luid ) == 0 ) return false;
HANDLE pt;
if( OpenProcessToken( GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &pt ) == 0 ) return false;
const auto adjust = AdjustTokenPrivileges( pt, FALSE, &priv, 0, nullptr, nullptr );
CloseHandle( pt );
if( adjust == 0 ) return false;
const auto status = GetLastError();
if( status != ERROR_SUCCESS ) return false;
if( isOs64Bit )
{
TRACE_PROFILE_INTERVAL interval = {};
interval.Interval = GetSamplingInterval();
const auto intervalStatus = TraceSetInformation( 0, TraceSampledProfileIntervalInfo, &interval, sizeof( interval ) );
if( intervalStatus != ERROR_SUCCESS ) return false;
samplingPeriod = GetSamplingPeriod();
}
const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + sizeof( KERNEL_LOGGER_NAME );
s_prop = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz );
memset( s_prop, 0, sizeof( EVENT_TRACE_PROPERTIES ) );
ULONG flags = 0;
#ifndef TRACY_NO_CONTEXT_SWITCH
flags = EVENT_TRACE_FLAG_CSWITCH | EVENT_TRACE_FLAG_DISPATCHER | EVENT_TRACE_FLAG_THREAD;
#endif
#ifndef TRACY_NO_SAMPLING
if( isOs64Bit ) flags |= EVENT_TRACE_FLAG_PROFILE;
#endif
s_prop->EnableFlags = flags;
s_prop->LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
s_prop->Wnode.BufferSize = psz;
s_prop->Wnode.Flags = WNODE_FLAG_TRACED_GUID;
#ifdef TRACY_TIMER_QPC
s_prop->Wnode.ClientContext = 1;
#else
s_prop->Wnode.ClientContext = 3;
#endif
s_prop->Wnode.Guid = SystemTraceControlGuid;
s_prop->BufferSize = 1024;
s_prop->MinimumBuffers = std::thread::hardware_concurrency() * 4;
s_prop->MaximumBuffers = std::thread::hardware_concurrency() * 6;
s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES );
memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) );
auto backup = tracy_malloc( psz );
memcpy( backup, s_prop, psz );
const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP );
if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND )
{
tracy_free( backup );
tracy_free( s_prop );
return false;
}
memcpy( s_prop, backup, psz );
tracy_free( backup );
const auto startStatus = StartTrace( &s_traceHandle, KERNEL_LOGGER_NAME, s_prop );
if( startStatus != ERROR_SUCCESS )
{
tracy_free( s_prop );
return false;
}
if( isOs64Bit )
{
CLASSIC_EVENT_ID stackId[2] = {};
stackId[0].EventGuid = PerfInfoGuid;
stackId[0].Type = 46;
stackId[1].EventGuid = ThreadV2Guid;
stackId[1].Type = 36;
const auto stackStatus = TraceSetInformation( s_traceHandle, TraceStackTracingInfo, &stackId, sizeof( stackId ) );
if( stackStatus != ERROR_SUCCESS )
{
tracy_free( s_prop );
return false;
}
}
#ifdef UNICODE
WCHAR KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )];
#else
char KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )];
#endif
memcpy( KernelLoggerName, KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) );
EVENT_TRACE_LOGFILE log = {};
log.LoggerName = KernelLoggerName;
log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
log.EventRecordCallback = EventRecordCallback;
s_traceHandle2 = OpenTrace( &log );
if( s_traceHandle2 == (TRACEHANDLE)INVALID_HANDLE_VALUE )
{
CloseTrace( s_traceHandle );
tracy_free( s_prop );
return false;
}
#ifndef TRACY_NO_VSYNC_CAPTURE
SetupVsync();
#endif
return true;
}
void SysTraceStop()
{
if( s_threadVsync )
{
CloseTrace( s_traceHandleVsync2 );
CloseTrace( s_traceHandleVsync );
s_threadVsync->~Thread();
tracy_free( s_threadVsync );
}
CloseTrace( s_traceHandle2 );
CloseTrace( s_traceHandle );
}
void SysTraceWorker( void* ptr )
{
ThreadExitHandler threadExitHandler;
SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
SetThreadName( "Tracy SysTrace" );
ProcessTrace( &s_traceHandle2, 1, 0, 0 );
ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP );
tracy_free( s_prop );
}
void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name )
{
bool threadSent = false;
auto hnd = OpenThread( THREAD_QUERY_INFORMATION, FALSE, DWORD( thread ) );
if( hnd == 0 )
{
hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, DWORD( thread ) );
}
if( hnd != 0 )
{
if( _GetThreadDescription )
{
PWSTR tmp;
_GetThreadDescription( hnd, &tmp );
char buf[256];
if( tmp )
{
auto ret = wcstombs( buf, tmp, 256 );
if( ret != 0 )
{
threadName = CopyString( buf, ret );
threadSent = true;
}
}
}
const auto pid = GetProcessIdOfThread( hnd );
if( !threadSent && NtQueryInformationThread && _EnumProcessModules && _GetModuleInformation && _GetModuleBaseNameA )
{
void* ptr;
ULONG retlen;
auto status = NtQueryInformationThread( hnd, (THREADINFOCLASS)9 /*ThreadQuerySetWin32StartAddress*/, &ptr, sizeof( &ptr ), &retlen );
if( status == 0 )
{
const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid );
if( phnd != INVALID_HANDLE_VALUE )
{
HMODULE modules[1024];
DWORD needed;
if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 )
{
const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) );
for( DWORD i=0; i<sz; i++ )
{
MODULEINFO info;
if( _GetModuleInformation( phnd, modules[i], &info, sizeof( info ) ) != 0 )
{
if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
{
char buf2[1024];
const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
if( modlen != 0 )
{
threadName = CopyString( buf2, modlen );
threadSent = true;
}
}
}
}
}
CloseHandle( phnd );
}
}
}
CloseHandle( hnd );
if( !threadSent )
{
threadName = CopyString( "???", 3 );
threadSent = true;
}
if( pid != 0 )
{
{
uint64_t _pid = pid;
TracyLfqPrepare( QueueType::TidToPid );
MemWrite( &item->tidToPid.tid, thread );
MemWrite( &item->tidToPid.pid, _pid );
TracyLfqCommit;
}
if( pid == 4 )
{
name = CopyStringFast( "System", 6 );
return;
}
else
{
const auto phnd = OpenProcess( PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid );
if( phnd != INVALID_HANDLE_VALUE )
{
char buf2[1024];
const auto sz = GetProcessImageFileNameA( phnd, buf2, 1024 );
CloseHandle( phnd );
if( sz != 0 )
{
auto ptr = buf2 + sz - 1;
while( ptr > buf2 && *ptr != '\\' ) ptr--;
if( *ptr == '\\' ) ptr++;
name = CopyStringFast( ptr );
return;
}
}
}
}
}
if( !threadSent )
{
threadName = CopyString( "???", 3 );
}
name = CopyStringFast( "???", 3 );
}
}
# elif defined __linux__
# include <sys/types.h>
# include <sys/stat.h>
# include <sys/wait.h>
# include <fcntl.h>
# include <inttypes.h>
# include <limits>
# include <poll.h>
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <unistd.h>
# include <atomic>
# include <thread>
# include <linux/perf_event.h>
# include <linux/version.h>
# include <sys/mman.h>
# include <sys/ioctl.h>
# include <sys/syscall.h>
# if defined __i386 || defined __x86_64__
# include "TracyCpuid.hpp"
# endif
# include "TracyProfiler.hpp"
# include "TracyRingBuffer.hpp"
# include "TracyThread.hpp"
namespace tracy
{
static std::atomic<bool> traceActive { false };
static int s_numCpus = 0;
static int s_numBuffers = 0;
static int s_ctxBufferIdx = 0;
static RingBuffer* s_ring = nullptr;
static const int ThreadHashSize = 4 * 1024;
static uint32_t s_threadHash[ThreadHashSize] = {};
static bool CurrentProcOwnsThread( uint32_t tid )
{
const auto hash = tid & ( ThreadHashSize-1 );
const auto hv = s_threadHash[hash];
if( hv == tid ) return true;
if( hv == -tid ) return false;
char path[256];
sprintf( path, "/proc/self/task/%d", tid );
struct stat st;
if( stat( path, &st ) == 0 )
{
s_threadHash[hash] = tid;
return true;
}
else
{
s_threadHash[hash] = -tid;
return false;
}
}
static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags )
{
return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags );
}
enum TraceEventId
{
EventCallstack,
EventCpuCycles,
EventInstructionsRetired,
EventCacheReference,
EventCacheMiss,
EventBranchRetired,
EventBranchMiss,
EventVsync,
EventContextSwitch,
EventWakeup,
};
static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid )
{
pe.config = config1;
pe.precise_ip = 3;
while( pe.precise_ip != 0 )
{
const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
close( fd );
break;
}
pe.precise_ip--;
}
pe.config = config0;
while( pe.precise_ip != 0 )
{
const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
close( fd );
break;
}
pe.precise_ip--;
}
TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip );
}
static void ProbePreciseIp( perf_event_attr& pe, pid_t pid )
{
pe.precise_ip = 3;
while( pe.precise_ip != 0 )
{
const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
close( fd );
break;
}
pe.precise_ip--;
}
TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip );
}
static bool IsGenuineIntel()
{
#if defined __i386 || defined __x86_64__
uint32_t regs[4] = {};
__get_cpuid( 0, regs, regs+1, regs+2, regs+3 );
char manufacturer[12];
memcpy( manufacturer, regs+1, 4 );
memcpy( manufacturer+4, regs+3, 4 );
memcpy( manufacturer+8, regs+2, 4 );
return memcmp( manufacturer, "GenuineIntel", 12 ) == 0;
#else
return false;
#endif
}
static const char* ReadFile( const char* path )
{
int fd = open( path, O_RDONLY );
if( fd < 0 ) return nullptr;
static char tmp[64];
const auto cnt = read( fd, tmp, 63 );
close( fd );
if( cnt < 0 ) return nullptr;
tmp[cnt] = '\0';
return tmp;
}
bool SysTraceStart( int64_t& samplingPeriod )
{
#ifndef CLOCK_MONOTONIC_RAW
return false;
#endif
const auto paranoidLevelStr = ReadFile( "/proc/sys/kernel/perf_event_paranoid" );
if( !paranoidLevelStr ) return false;
#ifdef TRACY_VERBOSE
int paranoidLevel = 2;
paranoidLevel = atoi( paranoidLevelStr );
TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel );
#endif
int switchId = -1, wakeupId = -1, vsyncId = -1;
const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" );
if( switchIdStr ) switchId = atoi( switchIdStr );
const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" );
if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr );
const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" );
if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr );
TracyDebug( "sched_switch id: %i\n", switchId );
TracyDebug( "sched_wakeup id: %i\n", wakeupId );
TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
#ifdef TRACY_NO_SAMPLE_RETIREMENT
const bool noRetirement = true;
#else
const char* noRetirementEnv = GetEnvVar( "TRACY_NO_SAMPLE_RETIREMENT" );
const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1';
#endif
#ifdef TRACY_NO_SAMPLE_CACHE
const bool noCache = true;
#else
const char* noCacheEnv = GetEnvVar( "TRACY_NO_SAMPLE_CACHE" );
const bool noCache = noCacheEnv && noCacheEnv[0] == '1';
#endif
#ifdef TRACY_NO_SAMPLE_BRANCH
const bool noBranch = true;
#else
const char* noBranchEnv = GetEnvVar( "TRACY_NO_SAMPLE_BRANCH" );
const bool noBranch = noBranchEnv && noBranchEnv[0] == '1';
#endif
#ifdef TRACY_NO_CONTEXT_SWITCH
const bool noCtxSwitch = true;
#else
const char* noCtxSwitchEnv = GetEnvVar( "TRACY_NO_CONTEXT_SWITCH" );
const bool noCtxSwitch = noCtxSwitchEnv && noCtxSwitchEnv[0] == '1';
#endif
#ifdef TRACY_NO_VSYNC_CAPTURE
const bool noVsync = true;
#else
const char* noVsyncEnv = GetEnvVar( "TRACY_NO_VSYNC_CAPTURE" );
const bool noVsync = noVsyncEnv && noVsyncEnv[0] == '1';
#endif
samplingPeriod = GetSamplingPeriod();
uint32_t currentPid = (uint32_t)getpid();
s_numCpus = (int)std::thread::hardware_concurrency();
const auto maxNumBuffers = s_numCpus * (
1 + // software sampling
2 + // CPU cycles + instructions retired
2 + // cache reference + miss
2 + // branch retired + miss
2 + // context switches + wakeups
1 // vsync
);
s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers );
s_numBuffers = 0;
// software sampling
perf_event_attr pe = {};
pe.type = PERF_TYPE_SOFTWARE;
pe.size = sizeof( perf_event_attr );
pe.config = PERF_COUNT_SW_CPU_CLOCK;
pe.sample_freq = GetSamplingFrequency();
pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN;
#if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 )
pe.sample_max_stack = 127;
#endif
pe.disabled = 1;
pe.freq = 1;
pe.inherit = 1;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
TracyDebug( "Setup software sampling\n" );
ProbePreciseIp( pe, currentPid );
for( int i=0; i<s_numCpus; i++ )
{
int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd == -1 )
{
pe.exclude_kernel = 1;
ProbePreciseIp( pe, currentPid );
fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd == -1 )
{
TracyDebug( " Failed to setup!\n");
break;
}
TracyDebug( " No access to kernel samples\n" );
}
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
// CPU cycles + instructions retired
pe = {};
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof( perf_event_attr );
pe.sample_freq = 5000;
pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TIME;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_guest = 1;
pe.exclude_hv = 1;
pe.freq = 1;
pe.inherit = 1;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
if( !noRetirement )
{
TracyDebug( "Setup sampling cycles + retirement\n" );
ProbePreciseIp( pe, PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, currentPid );
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCpuCycles );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventInstructionsRetired );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
}
// cache reference + miss
if( !noCache )
{
TracyDebug( "Setup sampling CPU cache references + misses\n" );
ProbePreciseIp( pe, PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES, currentPid );
if( IsGenuineIntel() )
{
pe.precise_ip = 0;
TracyDebug( " CPU is GenuineIntel, forcing precise_ip down to 0\n" );
}
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCacheReference );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
pe.config = PERF_COUNT_HW_CACHE_MISSES;
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCacheMiss );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
}
// branch retired + miss
if( !noBranch )
{
TracyDebug( "Setup sampling CPU branch retirements + misses\n" );
ProbePreciseIp( pe, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_MISSES, currentPid );
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventBranchRetired );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
pe.config = PERF_COUNT_HW_BRANCH_MISSES;
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventBranchMiss );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
}
s_ctxBufferIdx = s_numBuffers;
// vsync
if( !noVsync && vsyncId != -1 )
{
pe = {};
pe.type = PERF_TYPE_TRACEPOINT;
pe.size = sizeof( perf_event_attr );
pe.sample_period = 1;
pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW;
pe.disabled = 1;
pe.config = vsyncId;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
TracyDebug( "Setup vsync capture\n" );
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventVsync, i );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
}
// context switches
if( !noCtxSwitch && switchId != -1 )
{
pe = {};
pe.type = PERF_TYPE_TRACEPOINT;
pe.size = sizeof( perf_event_attr );
pe.sample_period = 1;
pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
#if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 )
pe.sample_max_stack = 127;
#endif
pe.disabled = 1;
pe.inherit = 1;
pe.config = switchId;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
TracyDebug( "Setup context switch capture\n" );
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 256*1024, fd, EventContextSwitch, i );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
if( wakeupId != -1 )
{
pe.config = wakeupId;
pe.config &= ~PERF_SAMPLE_CALLCHAIN;
TracyDebug( "Setup wakeup capture\n" );
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
}
}
}
}
TracyDebug( "Ringbuffers in use: %i\n", s_numBuffers );
traceActive.store( true, std::memory_order_relaxed );
return true;
}
void SysTraceStop()
{
traceActive.store( false, std::memory_order_relaxed );
}
static uint64_t* GetCallstackBlock( uint64_t cnt, RingBuffer& ring, uint64_t offset )
{
auto trace = (uint64_t*)tracy_malloc_fast( ( 1 + cnt ) * sizeof( uint64_t ) );
ring.Read( trace+1, offset, sizeof( uint64_t ) * cnt );
#if defined __x86_64__ || defined _M_X64
// remove non-canonical pointers
do
{
const auto test = (int64_t)trace[cnt];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 == m2 ) break;
}
while( --cnt > 0 );
for( uint64_t j=1; j<cnt; j++ )
{
const auto test = (int64_t)trace[j];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 != m2 ) trace[j] = 0;
}
#endif
for( uint64_t j=1; j<=cnt; j++ )
{
if( trace[j] >= (uint64_t)-4095 ) // PERF_CONTEXT_MAX
{
memmove( trace+j, trace+j+1, sizeof( uint64_t ) * ( cnt - j ) );
cnt--;
}
}
memcpy( trace, &cnt, sizeof( uint64_t ) );
return trace;
}
void SysTraceWorker( void* ptr )
{
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy Sampling" );
InitRpmalloc();
sched_param sp = { 99 };
if( pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ) != 0 ) TracyDebug( "Failed to increase SysTraceWorker thread priority!\n" );
auto ctxBufferIdx = s_ctxBufferIdx;
auto ringArray = s_ring;
auto numBuffers = s_numBuffers;
for( int i=0; i<numBuffers; i++ ) ringArray[i].Enable();
for(;;)
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
if( !traceActive.load( std::memory_order_relaxed ) ) break;
for( int i=0; i<numBuffers; i++ )
{
auto& ring = ringArray[i];
const auto head = ring.LoadHead();
const auto tail = ring.GetTail();
if( head != tail )
{
const auto end = head - tail;
ring.Advance( end );
}
}
if( !traceActive.load( std::memory_order_relaxed ) ) break;
std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
continue;
}
#endif
bool hadData = false;
for( int i=0; i<ctxBufferIdx; i++ )
{
if( !traceActive.load( std::memory_order_relaxed ) ) break;
auto& ring = ringArray[i];
const auto head = ring.LoadHead();
const auto tail = ring.GetTail();
if( head == tail ) continue;
assert( head > tail );
hadData = true;
const auto id = ring.GetId();
assert( id != EventContextSwitch );
const auto end = head - tail;
uint64_t pos = 0;
if( id == EventCallstack )
{
while( pos < end )
{
perf_event_header hdr;
ring.Read( &hdr, pos, sizeof( perf_event_header ) );
if( hdr.type == PERF_RECORD_SAMPLE )
{
auto offset = pos + sizeof( perf_event_header );
// Layout:
// u32 pid, tid
// u64 time
// u64 cnt
// u64 ip[cnt]
uint32_t tid;
uint64_t t0;
uint64_t cnt;
offset += sizeof( uint32_t );
ring.Read( &tid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
ring.Read( &t0, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
ring.Read( &cnt, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
if( cnt > 0 )
{
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
t0 = ring.ConvertTimeToTsc( t0 );
#endif
auto trace = GetCallstackBlock( cnt, ring, offset );
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSampleFat.time, t0 );
MemWrite( &item->callstackSampleFat.thread, tid );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
}
pos += hdr.size;
}
}
else
{
while( pos < end )
{
perf_event_header hdr;
ring.Read( &hdr, pos, sizeof( perf_event_header ) );
if( hdr.type == PERF_RECORD_SAMPLE )
{
auto offset = pos + sizeof( perf_event_header );
// Layout:
// u64 ip
// u64 time
uint64_t ip, t0;
ring.Read( &ip, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
ring.Read( &t0, offset, sizeof( uint64_t ) );
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
t0 = ring.ConvertTimeToTsc( t0 );
#endif
QueueType type;
switch( id )
{
case EventCpuCycles:
type = QueueType::HwSampleCpuCycle;
break;
case EventInstructionsRetired:
type = QueueType::HwSampleInstructionRetired;
break;
case EventCacheReference:
type = QueueType::HwSampleCacheReference;
break;
case EventCacheMiss:
type = QueueType::HwSampleCacheMiss;
break;
case EventBranchRetired:
type = QueueType::HwSampleBranchRetired;
break;
case EventBranchMiss:
type = QueueType::HwSampleBranchMiss;
break;
default:
assert( false );
break;
}
TracyLfqPrepare( type );
MemWrite( &item->hwSample.ip, ip );
MemWrite( &item->hwSample.time, t0 );
TracyLfqCommit;
}
pos += hdr.size;
}
}
assert( pos == end );
ring.Advance( end );
}
if( !traceActive.load( std::memory_order_relaxed ) ) break;
if( ctxBufferIdx != numBuffers )
{
const auto ctxBufNum = numBuffers - ctxBufferIdx;
int activeNum = 0;
uint16_t active[512];
uint32_t end[512];
uint32_t pos[512];
for( int i=0; i<ctxBufNum; i++ )
{
const auto rbIdx = ctxBufferIdx + i;
const auto rbHead = ringArray[rbIdx].LoadHead();
const auto rbTail = ringArray[rbIdx].GetTail();
const auto rbActive = rbHead != rbTail;
if( rbActive )
{
active[activeNum] = (uint16_t)i;
activeNum++;
end[i] = rbHead - rbTail;
pos[i] = 0;
}
else
{
end[i] = 0;
}
}
if( activeNum > 0 )
{
hadData = true;
while( activeNum > 0 )
{
int sel = -1;
int selPos;
int64_t t0 = std::numeric_limits<int64_t>::max();
for( int i=0; i<activeNum; i++ )
{
auto idx = active[i];
auto rbPos = pos[idx];
assert( rbPos < end[idx] );
const auto rbIdx = ctxBufferIdx + idx;
perf_event_header hdr;
ringArray[rbIdx].Read( &hdr, rbPos, sizeof( perf_event_header ) );
if( hdr.type == PERF_RECORD_SAMPLE )
{
int64_t rbTime;
ringArray[rbIdx].Read( &rbTime, rbPos + sizeof( perf_event_header ), sizeof( int64_t ) );
if( rbTime < t0 )
{
t0 = rbTime;
sel = idx;
selPos = i;
}
}
else
{
rbPos += hdr.size;
if( rbPos == end[idx] )
{
memmove( active+i, active+i+1, sizeof(*active) * ( activeNum - i - 1 ) );
activeNum--;
i--;
}
else
{
pos[idx] = rbPos;
}
}
}
if( sel >= 0 )
{
auto& ring = ringArray[ctxBufferIdx + sel];
auto rbPos = pos[sel];
auto offset = rbPos;
perf_event_header hdr;
ring.Read( &hdr, offset, sizeof( perf_event_header ) );
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
t0 = ring.ConvertTimeToTsc( t0 );
#endif
const auto rid = ring.GetId();
if( rid == EventContextSwitch )
{
// Layout:
// u64 time
// u64 cnt
// u64 ip[cnt]
// u32 size
// u8 data[size]
// Data (not ABI stable, but has not changed since it was added, in 2009):
// u8 hdr[8]
// u8 prev_comm[16]
// u32 prev_pid
// u32 prev_prio
// lng prev_state
// u8 next_comm[16]
// u32 next_pid
// u32 next_prio
offset += sizeof( perf_event_header ) + sizeof( uint64_t );
uint64_t cnt;
ring.Read( &cnt, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
const auto traceOffset = offset;
offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16;
uint32_t prev_pid, next_pid;
long prev_state;
ring.Read( &prev_pid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t ) + sizeof( uint32_t );
ring.Read( &prev_state, offset, sizeof( long ) );
offset += sizeof( long ) + 16;
ring.Read( &next_pid, offset, sizeof( uint32_t ) );
uint8_t reason = 100;
uint8_t state;
if( prev_state & 0x0001 ) state = 104;
else if( prev_state & 0x0002 ) state = 101;
else if( prev_state & 0x0004 ) state = 105;
else if( prev_state & 0x0008 ) state = 106;
else if( prev_state & 0x0010 ) state = 108;
else if( prev_state & 0x0020 ) state = 109;
else if( prev_state & 0x0040 ) state = 110;
else if( prev_state & 0x0080 ) state = 102;
else state = 103;
TracyLfqPrepare( QueueType::ContextSwitch );
MemWrite( &item->contextSwitch.time, t0 );
MemWrite( &item->contextSwitch.oldThread, prev_pid );
MemWrite( &item->contextSwitch.newThread, next_pid );
MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) );
MemWrite( &item->contextSwitch.reason, reason );
MemWrite( &item->contextSwitch.state, state );
TracyLfqCommit;
if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) )
{
auto trace = GetCallstackBlock( cnt, ring, traceOffset );
TracyLfqPrepare( QueueType::CallstackSampleContextSwitch );
MemWrite( &item->callstackSampleFat.time, t0 );
MemWrite( &item->callstackSampleFat.thread, prev_pid );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
}
else if( rid == EventWakeup )
{
// Layout:
// u64 time
// u32 size
// u8 data[size]
// Data:
// u8 hdr[8]
// u8 comm[16]
// u32 pid
// u32 prio
// u64 target_cpu
offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16;
uint32_t pid;
ring.Read( &pid, offset, sizeof( uint32_t ) );
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, t0 );
MemWrite( &item->threadWakeup.thread, pid );
TracyLfqCommit;
}
else
{
assert( rid == EventVsync );
// Layout:
// u64 time
// u32 size
// u8 data[size]
// Data (not ABI stable):
// u8 hdr[8]
// i32 crtc
// u32 seq
// i64 ktime
// u8 high precision
offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8;
int32_t crtc;
ring.Read( &crtc, offset, sizeof( int32_t ) );
// Note: The timestamp value t0 might be off by a number of microseconds from the
// true hardware vblank event. The ktime value should be used instead, but it is
// measured in CLOCK_MONOTONIC time. Tracy only supports the timestamp counter
// register (TSC) or CLOCK_MONOTONIC_RAW clock.
#if 0
offset += sizeof( uint32_t ) * 2;
int64_t ktime;
ring.Read( &ktime, offset, sizeof( int64_t ) );
#endif
TracyLfqPrepare( QueueType::FrameVsync );
MemWrite( &item->frameVsync.id, crtc );
MemWrite( &item->frameVsync.time, t0 );
TracyLfqCommit;
}
rbPos += hdr.size;
if( rbPos == end[sel] )
{
memmove( active+selPos, active+selPos+1, sizeof(*active) * ( activeNum - selPos - 1 ) );
activeNum--;
}
else
{
pos[sel] = rbPos;
}
}
}
for( int i=0; i<ctxBufNum; i++ )
{
if( end[i] != 0 ) ringArray[ctxBufferIdx + i].Advance( end[i] );
}
}
}
if( !traceActive.load( std::memory_order_relaxed ) ) break;
if( !hadData )
{
std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) );
}
}
for( int i=0; i<numBuffers; i++ ) ringArray[i].~RingBuffer();
tracy_free_fast( ringArray );
}
void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name )
{
FILE* f;
char fn[256];
sprintf( fn, "/proc/%" PRIu64 "/comm", thread );
f = fopen( fn, "rb" );
if( f )
{
char buf[256];
const auto sz = fread( buf, 1, 256, f );
if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0';
threadName = CopyString( buf );
fclose( f );
}
else
{
threadName = CopyString( "???", 3 );
}
sprintf( fn, "/proc/%" PRIu64 "/status", thread );
f = fopen( fn, "rb" );
if( f )
{
char* tmp = (char*)tracy_malloc_fast( 8*1024 );
const auto fsz = (ptrdiff_t)fread( tmp, 1, 8*1024, f );
fclose( f );
int pid = -1;
auto line = tmp;
for(;;)
{
if( memcmp( "Tgid:\t", line, 6 ) == 0 )
{
pid = atoi( line + 6 );
break;
}
while( line - tmp < fsz && *line != '\n' ) line++;
if( *line != '\n' ) break;
line++;
}
tracy_free_fast( tmp );
if( pid >= 0 )
{
{
uint64_t _pid = pid;
TracyLfqPrepare( QueueType::TidToPid );
MemWrite( &item->tidToPid.tid, thread );
MemWrite( &item->tidToPid.pid, _pid );
TracyLfqCommit;
}
sprintf( fn, "/proc/%i/comm", pid );
f = fopen( fn, "rb" );
if( f )
{
char buf[256];
const auto sz = fread( buf, 1, 256, f );
if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0';
name = CopyStringFast( buf );
fclose( f );
return;
}
}
}
name = CopyStringFast( "???", 3 );
}
}
# endif
#endif