#include "TracyDebug.hpp" #include "TracyStringHelpers.hpp" #include "TracySysTrace.hpp" #include "../common/TracySystem.hpp" #ifdef TRACY_HAS_SYSTEM_TRACING #ifndef TRACY_SAMPLING_HZ # if defined _WIN32 # define TRACY_SAMPLING_HZ 8000 # elif defined __linux__ # define TRACY_SAMPLING_HZ 10000 # endif #endif namespace tracy { static constexpr int GetSamplingFrequency() { #if defined _WIN32 return TRACY_SAMPLING_HZ > 8000 ? 8000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ ); #else return TRACY_SAMPLING_HZ > 1000000 ? 1000000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ ); #endif } static constexpr int GetSamplingPeriod() { return 1000000000 / GetSamplingFrequency(); } } # if defined _WIN32 # ifndef NOMINMAX # define NOMINMAX # endif # define INITGUID # include # include # include # include # include # include # include # include # include "../common/TracyAlloc.hpp" # include "../common/TracySystem.hpp" # include "TracyProfiler.hpp" # include "TracyThread.hpp" namespace tracy { static const GUID PerfInfoGuid = { 0xce1dbfb4, 0x137e, 0x4da6, { 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc } }; static const GUID DxgKrnlGuid = { 0x802ec45a, 0x1e99, 0x4b83, { 0x99, 0x20, 0x87, 0xc9, 0x82, 0x77, 0xba, 0x9d } }; static const GUID ThreadV2Guid = { 0x3d6fa8d1, 0xfe05, 0x11d0, { 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c } }; static TRACEHANDLE s_traceHandle; static TRACEHANDLE s_traceHandle2; static EVENT_TRACE_PROPERTIES* s_prop; static DWORD s_pid; static EVENT_TRACE_PROPERTIES* s_propVsync; static TRACEHANDLE s_traceHandleVsync; static TRACEHANDLE s_traceHandleVsync2; Thread* s_threadVsync = nullptr; struct CSwitch { uint32_t newThreadId; uint32_t oldThreadId; int8_t newThreadPriority; int8_t oldThreadPriority; uint8_t previousCState; int8_t spareByte; int8_t oldThreadWaitReason; int8_t oldThreadWaitMode; int8_t oldThreadState; int8_t oldThreadWaitIdealProcessor; uint32_t newThreadWaitTime; uint32_t reserved; }; struct ReadyThread { uint32_t threadId; int8_t adjustReason; int8_t adjustIncrement; int8_t flag; int8_t reserverd; }; struct ThreadTrace { uint32_t processId; uint32_t threadId; uint32_t stackBase; uint32_t stackLimit; uint32_t userStackBase; uint32_t userStackLimit; uint32_t startAddr; uint32_t win32StartAddr; uint32_t tebBase; uint32_t subProcessTag; }; struct StackWalkEvent { uint64_t eventTimeStamp; uint32_t stackProcess; uint32_t stackThread; uint64_t stack[192]; }; struct VSyncInfo { void* dxgAdapter; uint32_t vidPnTargetId; uint64_t scannedPhysicalAddress; uint32_t vidPnSourceId; uint32_t frameNumber; int64_t frameQpcTime; void* hFlipDevice; uint32_t flipType; uint64_t flipFenceId; }; extern "C" typedef NTSTATUS (WINAPI *t_NtQueryInformationThread)( HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG ); extern "C" typedef BOOL (WINAPI *t_EnumProcessModules)( HANDLE, HMODULE*, DWORD, LPDWORD ); extern "C" typedef BOOL (WINAPI *t_GetModuleInformation)( HANDLE, HMODULE, LPMODULEINFO, DWORD ); extern "C" typedef DWORD (WINAPI *t_GetModuleBaseNameA)( HANDLE, HMODULE, LPSTR, DWORD ); extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* ); t_NtQueryInformationThread NtQueryInformationThread = (t_NtQueryInformationThread)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "NtQueryInformationThread" ); t_EnumProcessModules _EnumProcessModules = (t_EnumProcessModules)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32EnumProcessModules" ); t_GetModuleInformation _GetModuleInformation = (t_GetModuleInformation)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleInformation" ); t_GetModuleBaseNameA _GetModuleBaseNameA = (t_GetModuleBaseNameA)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleBaseNameA" ); static t_GetThreadDescription _GetThreadDescription = 0; void WINAPI EventRecordCallback( PEVENT_RECORD record ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif const auto& hdr = record->EventHeader; switch( hdr.ProviderId.Data1 ) { case 0x3d6fa8d1: // Thread Guid if( hdr.EventDescriptor.Opcode == 36 ) { const auto cswitch = (const CSwitch*)record->UserData; TracyLfqPrepare( QueueType::ContextSwitch ); MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId ); MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId ); MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber ); MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason ); MemWrite( &item->contextSwitch.state, cswitch->oldThreadState ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == 50 ) { const auto rt = (const ReadyThread*)record->UserData; TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->threadWakeup.thread, rt->threadId ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 ) { const auto tt = (const ThreadTrace*)record->UserData; uint64_t tid = tt->threadId; if( tid == 0 ) return; uint64_t pid = tt->processId; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, tid ); MemWrite( &item->tidToPid.pid, pid ); TracyLfqCommit; } break; case 0xdef2fe46: // StackWalk Guid if( hdr.EventDescriptor.Opcode == 32 ) { const auto sw = (const StackWalkEvent*)record->UserData; if( sw->stackProcess == s_pid ) { const uint64_t sz = ( record->UserDataLength - 16 ) / 8; if( sz > 0 ) { auto trace = (uint64_t*)tracy_malloc( ( 1 + sz ) * sizeof( uint64_t ) ); memcpy( trace, &sz, sizeof( uint64_t ) ); memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz ); TracyLfqPrepare( QueueType::CallstackSample ); MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp ); MemWrite( &item->callstackSampleFat.thread, sw->stackThread ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } } break; default: break; } } static constexpr const char* VsyncName[] = { "[0] Vsync", "[1] Vsync", "[2] Vsync", "[3] Vsync", "[4] Vsync", "[5] Vsync", "[6] Vsync", "[7] Vsync", "Vsync" }; static uint32_t VsyncTarget[8] = {}; void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif const auto& hdr = record->EventHeader; assert( hdr.ProviderId.Data1 == 0x802EC45A ); assert( hdr.EventDescriptor.Id == 0x0011 ); const auto vs = (const VSyncInfo*)record->UserData; int idx = 0; do { if( VsyncTarget[idx] == 0 ) { VsyncTarget[idx] = vs->vidPnTargetId; break; } else if( VsyncTarget[idx] == vs->vidPnTargetId ) { break; } } while( ++idx < 8 ); TracyLfqPrepare( QueueType::FrameMarkMsg ); MemWrite( &item->frameMark.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->frameMark.name, uint64_t( VsyncName[idx] ) ); TracyLfqCommit; } static void SetupVsync() { #if _WIN32_WINNT >= _WIN32_WINNT_WINBLUE const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + MAX_PATH; s_propVsync = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz ); memset( s_propVsync, 0, sizeof( EVENT_TRACE_PROPERTIES ) ); s_propVsync->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; s_propVsync->Wnode.BufferSize = psz; #ifdef TRACY_TIMER_QPC s_propVsync->Wnode.ClientContext = 1; #else s_propVsync->Wnode.ClientContext = 3; #endif s_propVsync->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); strcpy( ((char*)s_propVsync) + sizeof( EVENT_TRACE_PROPERTIES ), "TracyVsync" ); auto backup = tracy_malloc( psz ); memcpy( backup, s_propVsync, psz ); const auto controlStatus = ControlTraceA( 0, "TracyVsync", s_propVsync, EVENT_TRACE_CONTROL_STOP ); if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) { tracy_free( backup ); tracy_free( s_propVsync ); return; } memcpy( s_propVsync, backup, psz ); tracy_free( backup ); const auto startStatus = StartTraceA( &s_traceHandleVsync, "TracyVsync", s_propVsync ); if( startStatus != ERROR_SUCCESS ) { tracy_free( s_propVsync ); return; } EVENT_FILTER_EVENT_ID fe = {}; fe.FilterIn = TRUE; fe.Count = 1; fe.Events[0] = 0x0011; // VSyncDPC_Info EVENT_FILTER_DESCRIPTOR desc = {}; desc.Ptr = (ULONGLONG)&fe; desc.Size = sizeof( fe ); desc.Type = EVENT_FILTER_TYPE_EVENT_ID; ENABLE_TRACE_PARAMETERS params = {}; params.Version = ENABLE_TRACE_PARAMETERS_VERSION_2; params.EnableProperty = EVENT_ENABLE_PROPERTY_IGNORE_KEYWORD_0; params.SourceId = s_propVsync->Wnode.Guid; params.EnableFilterDesc = &desc; params.FilterDescCount = 1; uint64_t mask = 0x4000000000000001; // Microsoft_Windows_DxgKrnl_Performance | Base if( EnableTraceEx2( s_traceHandleVsync, &DxgKrnlGuid, EVENT_CONTROL_CODE_ENABLE_PROVIDER, TRACE_LEVEL_INFORMATION, mask, mask, 0, ¶ms ) != ERROR_SUCCESS ) { tracy_free( s_propVsync ); return; } char loggerName[MAX_PATH]; strcpy( loggerName, "TracyVsync" ); EVENT_TRACE_LOGFILEA log = {}; log.LoggerName = loggerName; log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP; log.EventRecordCallback = EventRecordCallbackVsync; s_traceHandleVsync2 = OpenTraceA( &log ); if( s_traceHandleVsync2 == (TRACEHANDLE)INVALID_HANDLE_VALUE ) { CloseTrace( s_traceHandleVsync ); tracy_free( s_propVsync ); return; } s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_threadVsync) Thread( [] (void*) { ThreadExitHandler threadExitHandler; SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); SetThreadName( "Tracy Vsync" ); ProcessTrace( &s_traceHandleVsync2, 1, nullptr, nullptr ); }, nullptr ); #endif } static constexpr int GetSamplingInterval() { return GetSamplingPeriod() / 100; } bool SysTraceStart( int64_t& samplingPeriod ) { if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); s_pid = GetCurrentProcessId(); #if defined _WIN64 constexpr bool isOs64Bit = true; #else BOOL _iswow64; IsWow64Process( GetCurrentProcess(), &_iswow64 ); const bool isOs64Bit = _iswow64; #endif TOKEN_PRIVILEGES priv = {}; priv.PrivilegeCount = 1; priv.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; if( LookupPrivilegeValue( nullptr, SE_SYSTEM_PROFILE_NAME, &priv.Privileges[0].Luid ) == 0 ) return false; HANDLE pt; if( OpenProcessToken( GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &pt ) == 0 ) return false; const auto adjust = AdjustTokenPrivileges( pt, FALSE, &priv, 0, nullptr, nullptr ); CloseHandle( pt ); if( adjust == 0 ) return false; const auto status = GetLastError(); if( status != ERROR_SUCCESS ) return false; if( isOs64Bit ) { TRACE_PROFILE_INTERVAL interval = {}; interval.Interval = GetSamplingInterval(); const auto intervalStatus = TraceSetInformation( 0, TraceSampledProfileIntervalInfo, &interval, sizeof( interval ) ); if( intervalStatus != ERROR_SUCCESS ) return false; samplingPeriod = GetSamplingPeriod(); } const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + sizeof( KERNEL_LOGGER_NAME ); s_prop = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz ); memset( s_prop, 0, sizeof( EVENT_TRACE_PROPERTIES ) ); ULONG flags = 0; #ifndef TRACY_NO_CONTEXT_SWITCH flags = EVENT_TRACE_FLAG_CSWITCH | EVENT_TRACE_FLAG_DISPATCHER | EVENT_TRACE_FLAG_THREAD; #endif #ifndef TRACY_NO_SAMPLING if( isOs64Bit ) flags |= EVENT_TRACE_FLAG_PROFILE; #endif s_prop->EnableFlags = flags; s_prop->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; s_prop->Wnode.BufferSize = psz; s_prop->Wnode.Flags = WNODE_FLAG_TRACED_GUID; #ifdef TRACY_TIMER_QPC s_prop->Wnode.ClientContext = 1; #else s_prop->Wnode.ClientContext = 3; #endif s_prop->Wnode.Guid = SystemTraceControlGuid; s_prop->BufferSize = 1024; s_prop->MinimumBuffers = std::thread::hardware_concurrency() * 4; s_prop->MaximumBuffers = std::thread::hardware_concurrency() * 6; s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) ); auto backup = tracy_malloc( psz ); memcpy( backup, s_prop, psz ); const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) { tracy_free( backup ); tracy_free( s_prop ); return false; } memcpy( s_prop, backup, psz ); tracy_free( backup ); const auto startStatus = StartTrace( &s_traceHandle, KERNEL_LOGGER_NAME, s_prop ); if( startStatus != ERROR_SUCCESS ) { tracy_free( s_prop ); return false; } if( isOs64Bit ) { CLASSIC_EVENT_ID stackId[2] = {}; stackId[0].EventGuid = PerfInfoGuid; stackId[0].Type = 46; stackId[1].EventGuid = ThreadV2Guid; stackId[1].Type = 36; const auto stackStatus = TraceSetInformation( s_traceHandle, TraceStackTracingInfo, &stackId, sizeof( stackId ) ); if( stackStatus != ERROR_SUCCESS ) { tracy_free( s_prop ); return false; } } #ifdef UNICODE WCHAR KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )]; #else char KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )]; #endif memcpy( KernelLoggerName, KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) ); EVENT_TRACE_LOGFILE log = {}; log.LoggerName = KernelLoggerName; log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP; log.EventRecordCallback = EventRecordCallback; s_traceHandle2 = OpenTrace( &log ); if( s_traceHandle2 == (TRACEHANDLE)INVALID_HANDLE_VALUE ) { CloseTrace( s_traceHandle ); tracy_free( s_prop ); return false; } #ifndef TRACY_NO_VSYNC_CAPTURE SetupVsync(); #endif return true; } void SysTraceStop() { if( s_threadVsync ) { CloseTrace( s_traceHandleVsync2 ); CloseTrace( s_traceHandleVsync ); s_threadVsync->~Thread(); tracy_free( s_threadVsync ); } CloseTrace( s_traceHandle2 ); CloseTrace( s_traceHandle ); } void SysTraceWorker( void* ptr ) { ThreadExitHandler threadExitHandler; SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); SetThreadName( "Tracy SysTrace" ); ProcessTrace( &s_traceHandle2, 1, 0, 0 ); ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); tracy_free( s_prop ); } void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name ) { bool threadSent = false; auto hnd = OpenThread( THREAD_QUERY_INFORMATION, FALSE, DWORD( thread ) ); if( hnd == 0 ) { hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, DWORD( thread ) ); } if( hnd != 0 ) { if( _GetThreadDescription ) { PWSTR tmp; _GetThreadDescription( hnd, &tmp ); char buf[256]; if( tmp ) { auto ret = wcstombs( buf, tmp, 256 ); if( ret != 0 ) { threadName = CopyString( buf, ret ); threadSent = true; } } } const auto pid = GetProcessIdOfThread( hnd ); if( !threadSent && NtQueryInformationThread && _EnumProcessModules && _GetModuleInformation && _GetModuleBaseNameA ) { void* ptr; ULONG retlen; auto status = NtQueryInformationThread( hnd, (THREADINFOCLASS)9 /*ThreadQuerySetWin32StartAddress*/, &ptr, sizeof( &ptr ), &retlen ); if( status == 0 ) { const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid ); if( phnd != INVALID_HANDLE_VALUE ) { HMODULE modules[1024]; DWORD needed; if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 ) { const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) ); for( DWORD i=0; i= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage ) { char buf2[1024]; const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 ); if( modlen != 0 ) { threadName = CopyString( buf2, modlen ); threadSent = true; } } } } } CloseHandle( phnd ); } } } CloseHandle( hnd ); if( !threadSent ) { threadName = CopyString( "???", 3 ); threadSent = true; } if( pid != 0 ) { { uint64_t _pid = pid; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, thread ); MemWrite( &item->tidToPid.pid, _pid ); TracyLfqCommit; } if( pid == 4 ) { name = CopyStringFast( "System", 6 ); return; } else { const auto phnd = OpenProcess( PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid ); if( phnd != INVALID_HANDLE_VALUE ) { char buf2[1024]; const auto sz = GetProcessImageFileNameA( phnd, buf2, 1024 ); CloseHandle( phnd ); if( sz != 0 ) { auto ptr = buf2 + sz - 1; while( ptr > buf2 && *ptr != '\\' ) ptr--; if( *ptr == '\\' ) ptr++; name = CopyStringFast( ptr ); return; } } } } } if( !threadSent ) { threadName = CopyString( "???", 3 ); } name = CopyStringFast( "???", 3 ); } } # elif defined __linux__ # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include "TracyProfiler.hpp" # include "TracyRingBuffer.hpp" # include "TracyThread.hpp" namespace tracy { static std::atomic traceActive { false }; static int s_numCpus = 0; static int s_numBuffers = 0; static int s_ctxBufferIdx = 0; static constexpr size_t RingBufSize = 64*1024; static RingBuffer* s_ring = nullptr; static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags ) { return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags ); } enum TraceEventId { EventCallstack, EventCpuCycles, EventInstructionsRetired, EventCacheReference, EventCacheMiss, EventBranchRetired, EventBranchMiss, EventContextSwitch, EventWakeup, }; static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid ) { pe.config = config1; pe.precise_ip = 3; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } pe.config = config0; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip ); } static void ProbePreciseIp( perf_event_attr& pe, pid_t pid ) { pe.precise_ip = 3; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip ); } static bool IsGenuineIntel() { #if defined __i386 || defined __x86_64__ uint32_t regs[4] = {}; __get_cpuid( 0, regs, regs+1, regs+2, regs+3 ); char manufacturer[12]; memcpy( manufacturer, regs+1, 4 ); memcpy( manufacturer+4, regs+3, 4 ); memcpy( manufacturer+8, regs+2, 4 ); return memcmp( manufacturer, "GenuineIntel", 12 ) == 0; #else return false; #endif } static const char* ReadFile( const char* path ) { int fd = open( path, O_RDONLY ); if( fd < 0 ) return nullptr; static char tmp[64]; const auto cnt = read( fd, tmp, 63 ); close( fd ); if( cnt < 0 ) return nullptr; tmp[cnt] = '\0'; return tmp; } #ifdef __ANDROID__ static const char* ReadFileElevated( const char* path ) { // Explanation for "su root sh -c": there are 2 flavors of "su" in circulation // on Android. The default Android su has the following syntax to run a command // as root: // su root 'command' // and 'command' is exec'd not passed to a shell, so if shell interpretation is // wanted, one needs to do: // su root sh -c 'command' // Besides that default Android 'su' command, some Android devices use a different // su with a command-line interface closer to the familiar util-linux su found // on Linux distributions. Fortunately, both the util-linux su and the one // in https://github.com/topjohnwu/Magisk seem to be happy with the above // `su root sh -c 'command'` command line syntax. int pipefd[2]; if( pipe( pipefd ) == 0 ) { const auto pid = fork(); if( pid == 0 ) { // child close( pipefd[0] ); dup2( open( "/dev/null", O_WRONLY ), STDERR_FILENO ); if( dup2( pipefd[1], STDOUT_FILENO ) >= 0 ) { close( pipefd[1] ); char tmp[1024]; sprintf( tmp, "cat %s", path ); execlp( "su", "su", "root", "sh", "-c", tmp, (char*)nullptr ); exit( 1 ); } exit( 0 ); } else if( pid > 0 ) { // parent close( pipefd[1] ); static char tmp[64]; const auto sz = read( pipefd[0], tmp, 63 ); close( pipefd[0] ); waitpid( pid, nullptr, 0 ); if( sz <= 0 ) return nullptr; tmp[sz] = '\0'; return tmp; } else { return nullptr; } } else { return nullptr; } } #else static const char* ReadFileElevated( const char* path ) { return ReadFile( path ); } #endif bool SysTraceStart( int64_t& samplingPeriod ) { #ifndef CLOCK_MONOTONIC_RAW return false; #endif int paranoidLevel = 2; const auto paranoidLevelStr = ReadFile( "/proc/sys/kernel/perf_event_paranoid" ); if( !paranoidLevelStr ) return false; paranoidLevel = atoi( paranoidLevelStr ); TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel ); int switchId = -1, wakeupId = -1; const auto switchIdStr = ReadFileElevated( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" ); if( switchIdStr ) switchId = atoi( switchIdStr ); const auto wakeupIdStr = ReadFileElevated( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" ); if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr ); TracyDebug( "sched_switch id: %i\nsched_wakeup id: %i\n", switchId, wakeupId ); #ifdef TRACY_NO_SAMPLE_RETIREMENT const bool noRetirement = true; #else const char* noRetirementEnv = GetEnvVar( "TRACY_NO_SAMPLE_RETIREMENT" ); const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1'; #endif #ifdef TRACY_NO_SAMPLE_CACHE const bool noCache = true; #else const char* noCacheEnv = GetEnvVar( "TRACY_NO_SAMPLE_CACHE" ); const bool noCache = noCacheEnv && noCacheEnv[0] == '1'; #endif #ifdef TRACY_NO_SAMPLE_BRANCH const bool noBranch = true; #else const char* noBranchEnv = GetEnvVar( "TRACY_NO_SAMPLE_BRANCH" ); const bool noBranch = noBranchEnv && noBranchEnv[0] == '1'; #endif #ifdef TRACY_NO_CONTEXT_SWITCH const bool noCtxSwitch = true; #else const char* noCtxSwitchEnv = GetEnvVar( "TRACY_NO_CONTEXT_SWITCH" ); const bool noCtxSwitch = noCtxSwitchEnv && noCtxSwitchEnv[0] == '1'; #endif samplingPeriod = GetSamplingPeriod(); uint32_t currentPid = (uint32_t)getpid(); const auto maxNumBuffers = s_numCpus * ( 1 + // software sampling 2 + // CPU cycles + instructions retired 2 + // cache reference + miss 2 + // branch retired + miss 2 // context switches + wakeups ); s_numCpus = (int)std::thread::hardware_concurrency(); s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers ); s_numBuffers = 0; // software sampling perf_event_attr pe = {}; pe.type = PERF_TYPE_SOFTWARE; pe.size = sizeof( perf_event_attr ); pe.config = PERF_COUNT_SW_CPU_CLOCK; pe.sample_freq = GetSamplingFrequency(); pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN; #if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 ) pe.sample_max_stack = 127; #endif pe.disabled = 1; pe.freq = 1; pe.inherit = 1; #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) pe.use_clockid = 1; pe.clockid = CLOCK_MONOTONIC_RAW; #endif TracyDebug( "Setup software sampling\n" ); ProbePreciseIp( pe, currentPid ); for( int i=0; i(); tracy_free( s_ring ); return false; } TracyDebug( " No access to kernel samples\n" ); } new( s_ring+s_numBuffers ) RingBuffer( fd, EventCallstack ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } // CPU cycles + instructions retired pe = {}; pe.type = PERF_TYPE_HARDWARE; pe.size = sizeof( perf_event_attr ); pe.sample_freq = 5000; pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TIME; pe.disabled = 1; pe.exclude_kernel = 1; pe.exclude_guest = 1; pe.exclude_hv = 1; pe.freq = 1; pe.inherit = 1; #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) pe.use_clockid = 1; pe.clockid = CLOCK_MONOTONIC_RAW; #endif if( !noRetirement ) { TracyDebug( "Setup sampling cycles + retirement\n" ); ProbePreciseIp( pe, PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, currentPid ); for( int i=0; i( fd, EventCpuCycles ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } } pe.config = PERF_COUNT_HW_INSTRUCTIONS; for( int i=0; i( fd, EventInstructionsRetired ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } } } // cache reference + miss if( !noCache ) { TracyDebug( "Setup sampling CPU cache references + misses\n" ); ProbePreciseIp( pe, PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES, currentPid ); if( IsGenuineIntel() ) { pe.precise_ip = 0; TracyDebug( " CPU is GenuineIntel, forcing precise_ip down to 0\n" ); } for( int i=0; i( fd, EventCacheReference ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } } pe.config = PERF_COUNT_HW_CACHE_MISSES; for( int i=0; i( fd, EventCacheMiss ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } } } // branch retired + miss if( !noBranch ) { TracyDebug( "Setup sampling CPU branch retirements + misses\n" ); ProbePreciseIp( pe, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_MISSES, currentPid ); for( int i=0; i( fd, EventBranchRetired ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } } pe.config = PERF_COUNT_HW_BRANCH_MISSES; for( int i=0; i( fd, EventBranchMiss ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } } } s_ctxBufferIdx = s_numBuffers; // context switches if( !noCtxSwitch && switchId != -1 ) { pe = {}; pe.type = PERF_TYPE_TRACEPOINT; pe.size = sizeof( perf_event_attr ); pe.sample_period = 1; pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW; pe.disabled = 1; pe.inherit = 1; pe.config = switchId; #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) pe.use_clockid = 1; pe.clockid = CLOCK_MONOTONIC_RAW; #endif TracyDebug( "Setup context switch capture\n" ); for( int i=0; i( fd, EventContextSwitch, i ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } } if( wakeupId != -1 ) { pe.config = wakeupId; TracyDebug( "Setup wakeup capture\n" ); for( int i=0; i( fd, EventWakeup, i ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } } } } TracyDebug( "Ringbuffers in use: %i\n", s_numBuffers ); traceActive.store( true, std::memory_order_relaxed ); return true; } void SysTraceStop() { traceActive.store( false, std::memory_order_relaxed ); } void SysTraceWorker( void* ptr ) { ThreadExitHandler threadExitHandler; SetThreadName( "Tracy Sampling" ); InitRpmalloc(); sched_param sp = { 5 }; pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ); for( int i=0; i tail ); hadData = true; const auto end = head - tail; uint64_t pos = 0; while( pos < end ) { perf_event_header hdr; ring.Read( &hdr, pos, sizeof( perf_event_header ) ); if( hdr.type == PERF_RECORD_SAMPLE ) { auto offset = pos + sizeof( perf_event_header ); const auto id = ring.GetId(); assert( id != EventContextSwitch ); if( id == EventCallstack ) { // Layout: // u32 pid, tid // u64 time // u64 cnt // u64 ip[cnt] uint32_t tid; uint64_t t0; uint64_t cnt; offset += sizeof( uint32_t ); ring.Read( &tid, offset, sizeof( uint32_t ) ); offset += sizeof( uint32_t ); ring.Read( &t0, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); ring.Read( &cnt, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); if( cnt > 0 ) { #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif auto trace = (uint64_t*)tracy_malloc_fast( ( 1 + cnt ) * sizeof( uint64_t ) ); ring.Read( trace+1, offset, sizeof( uint64_t ) * cnt ); #if defined __x86_64__ || defined _M_X64 // remove non-canonical pointers do { const auto test = (int64_t)trace[cnt]; const auto m1 = test >> 63; const auto m2 = test >> 47; if( m1 == m2 ) break; } while( --cnt > 0 ); for( uint64_t j=1; j> 63; const auto m2 = test >> 47; if( m1 != m2 ) trace[j] = 0; } #endif for( uint64_t j=1; j<=cnt; j++ ) { if( trace[j] >= (uint64_t)-4095 ) // PERF_CONTEXT_MAX { memmove( trace+j, trace+j+1, sizeof( uint64_t ) * ( cnt - j ) ); cnt--; } } memcpy( trace, &cnt, sizeof( uint64_t ) ); TracyLfqPrepare( QueueType::CallstackSample ); MemWrite( &item->callstackSampleFat.time, t0 ); MemWrite( &item->callstackSampleFat.thread, tid ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } else { // Layout: // u64 ip // u64 time uint64_t ip, t0; ring.Read( &ip, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); ring.Read( &t0, offset, sizeof( uint64_t ) ); #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif QueueType type; switch( id ) { case EventCpuCycles: type = QueueType::HwSampleCpuCycle; break; case EventInstructionsRetired: type = QueueType::HwSampleInstructionRetired; break; case EventCacheReference: type = QueueType::HwSampleCacheReference; break; case EventCacheMiss: type = QueueType::HwSampleCacheMiss; break; case EventBranchRetired: type = QueueType::HwSampleBranchRetired; break; case EventBranchMiss: type = QueueType::HwSampleBranchMiss; break; default: assert( false ); break; } TracyLfqPrepare( type ); MemWrite( &item->hwSample.ip, ip ); MemWrite( &item->hwSample.time, t0 ); TracyLfqCommit; } } pos += hdr.size; } assert( pos == end ); ring.Advance( end ); } if( !traceActive.load( std::memory_order_relaxed ) ) break; if( s_ctxBufferIdx != s_numBuffers ) { const auto ctxBufNum = s_numBuffers - s_ctxBufferIdx; int activeNum = 0; bool active[512]; uint32_t end[512]; uint32_t pos[512]; for( int i=0; i 0 ) { hadData = true; while( activeNum > 0 ) { int sel = -1; int64_t t0 = std::numeric_limits::max(); for( int i=0; i= 0 || activeNum == 0 ); if( sel >= 0 ) { auto& ring = s_ring[s_ctxBufferIdx + sel]; auto rbPos = pos[sel]; auto offset = rbPos; perf_event_header hdr; ring.Read( &hdr, offset, sizeof( perf_event_header ) ); #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif if( ring.GetId() == EventContextSwitch ) { // Layout: // u64 time // u32 size // u8 data[size] // Data (not ABI stable, but has not changed since it was added, in 2009): // u8 hdr[8] // u8 prev_comm[16] // u32 prev_pid // u32 prev_prio // u64 prev_state // u8 next_comm[16] // u32 next_pid // u32 next_prio offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16; uint32_t prev_pid, next_pid; uint64_t prev_state; ring.Read( &prev_pid, offset, sizeof( uint32_t ) ); offset += sizeof( uint32_t ) + sizeof( uint32_t ); ring.Read( &prev_state, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ) + 16; ring.Read( &next_pid, offset, sizeof( uint32_t ) ); uint8_t reason = 100; uint8_t state; if( prev_state & 0x0001 ) state = 104; else if( prev_state & 0x0002 ) state = 101; else if( prev_state & 0x0004 ) state = 105; else if( prev_state & 0x0008 ) state = 106; else if( prev_state & 0x0010 ) state = 108; else if( prev_state & 0x0020 ) state = 109; else if( prev_state & 0x0040 ) state = 110; else if( prev_state & 0x0080 ) state = 102; else state = 103; TracyLfqPrepare( QueueType::ContextSwitch ); MemWrite( &item->contextSwitch.time, t0 ); MemWrite( &item->contextSwitch.oldThread, prev_pid ); MemWrite( &item->contextSwitch.newThread, next_pid ); MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) ); MemWrite( &item->contextSwitch.reason, reason ); MemWrite( &item->contextSwitch.state, state ); TracyLfqCommit; } else { assert( ring.GetId() == EventWakeup ); // Layout: // u64 time // u32 size // u8 data[size] // Data: // u8 hdr[8] // u8 comm[16] // u32 pid // u32 prio // u64 target_cpu offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16; uint32_t pid; ring.Read( &pid, offset, sizeof( uint32_t ) ); TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, t0 ); MemWrite( &item->threadWakeup.thread, pid ); TracyLfqCommit; } rbPos += hdr.size; if( rbPos == end[sel] ) { active[sel] = false; activeNum--; } else { pos[sel] = rbPos; } } } for( int i=0; i(); tracy_free_fast( s_ring ); } void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name ) { FILE* f; char fn[256]; sprintf( fn, "/proc/%" PRIu64 "/comm", thread ); f = fopen( fn, "rb" ); if( f ) { char buf[256]; const auto sz = fread( buf, 1, 256, f ); if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0'; threadName = CopyString( buf ); fclose( f ); } else { threadName = CopyString( "???", 3 ); } sprintf( fn, "/proc/%" PRIu64 "/status", thread ); f = fopen( fn, "rb" ); if( f ) { int pid = -1; size_t lsz = 1024; auto line = (char*)tracy_malloc_fast( lsz ); for(;;) { auto rd = getline( &line, &lsz, f ); if( rd <= 0 ) break; if( memcmp( "Tgid:\t", line, 6 ) == 0 ) { pid = atoi( line + 6 ); break; } } tracy_free_fast( line ); fclose( f ); if( pid >= 0 ) { { uint64_t _pid = pid; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, thread ); MemWrite( &item->tidToPid.pid, _pid ); TracyLfqCommit; } sprintf( fn, "/proc/%i/comm", pid ); f = fopen( fn, "rb" ); if( f ) { char buf[256]; const auto sz = fread( buf, 1, 256, f ); if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0'; name = CopyStringFast( buf ); fclose( f ); return; } } } name = CopyStringFast( "???", 3 ); } } # endif #endif