#include "TracyDebug.hpp" #include "TracyStringHelpers.hpp" #include "TracySysTrace.hpp" #include "../common/TracySystem.hpp" #ifdef TRACY_HAS_SYSTEM_TRACING #ifndef TRACY_SAMPLING_HZ # if defined _WIN32 # define TRACY_SAMPLING_HZ 8000 # elif defined __linux__ # define TRACY_SAMPLING_HZ 10000 # endif #endif namespace tracy { static int GetSamplingFrequency() { int samplingHz = TRACY_SAMPLING_HZ; auto env = GetEnvVar( "TRACY_SAMPLING_HZ" ); if( env ) { int val = atoi( env ); if( val > 0 ) samplingHz = val; } #if defined _WIN32 return samplingHz > 8000 ? 8000 : ( samplingHz < 1 ? 1 : samplingHz ); #else return samplingHz > 1000000 ? 1000000 : ( samplingHz < 1 ? 1 : samplingHz ); #endif } static int GetSamplingPeriod() { return 1000000000 / GetSamplingFrequency(); } } # if defined _WIN32 # ifndef NOMINMAX # define NOMINMAX # endif # define INITGUID # include # include # include # include # include # include # include # include # include "../common/TracyAlloc.hpp" # include "../common/TracySystem.hpp" # include "TracyProfiler.hpp" # include "TracyThread.hpp" namespace tracy { static const GUID PerfInfoGuid = { 0xce1dbfb4, 0x137e, 0x4da6, { 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc } }; static const GUID DxgKrnlGuid = { 0x802ec45a, 0x1e99, 0x4b83, { 0x99, 0x20, 0x87, 0xc9, 0x82, 0x77, 0xba, 0x9d } }; static const GUID ThreadV2Guid = { 0x3d6fa8d1, 0xfe05, 0x11d0, { 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c } }; static TRACEHANDLE s_traceHandle; static TRACEHANDLE s_traceHandle2; static EVENT_TRACE_PROPERTIES* s_prop; static DWORD s_pid; static EVENT_TRACE_PROPERTIES* s_propVsync; static TRACEHANDLE s_traceHandleVsync; static TRACEHANDLE s_traceHandleVsync2; Thread* s_threadVsync = nullptr; struct CSwitch { uint32_t newThreadId; uint32_t oldThreadId; int8_t newThreadPriority; int8_t oldThreadPriority; uint8_t previousCState; int8_t spareByte; int8_t oldThreadWaitReason; int8_t oldThreadWaitMode; int8_t oldThreadState; int8_t oldThreadWaitIdealProcessor; uint32_t newThreadWaitTime; uint32_t reserved; }; struct ReadyThread { uint32_t threadId; int8_t adjustReason; int8_t adjustIncrement; int8_t flag; int8_t reserverd; }; struct ThreadTrace { uint32_t processId; uint32_t threadId; uint32_t stackBase; uint32_t stackLimit; uint32_t userStackBase; uint32_t userStackLimit; uint32_t startAddr; uint32_t win32StartAddr; uint32_t tebBase; uint32_t subProcessTag; }; struct StackWalkEvent { uint64_t eventTimeStamp; uint32_t stackProcess; uint32_t stackThread; uint64_t stack[192]; }; struct VSyncInfo { void* dxgAdapter; uint32_t vidPnTargetId; uint64_t scannedPhysicalAddress; uint32_t vidPnSourceId; uint32_t frameNumber; int64_t frameQpcTime; void* hFlipDevice; uint32_t flipType; uint64_t flipFenceId; }; extern "C" typedef NTSTATUS (WINAPI *t_NtQueryInformationThread)( HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG ); extern "C" typedef BOOL (WINAPI *t_EnumProcessModules)( HANDLE, HMODULE*, DWORD, LPDWORD ); extern "C" typedef BOOL (WINAPI *t_GetModuleInformation)( HANDLE, HMODULE, LPMODULEINFO, DWORD ); extern "C" typedef DWORD (WINAPI *t_GetModuleBaseNameA)( HANDLE, HMODULE, LPSTR, DWORD ); extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* ); t_NtQueryInformationThread NtQueryInformationThread = (t_NtQueryInformationThread)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "NtQueryInformationThread" ); t_EnumProcessModules _EnumProcessModules = (t_EnumProcessModules)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32EnumProcessModules" ); t_GetModuleInformation _GetModuleInformation = (t_GetModuleInformation)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleInformation" ); t_GetModuleBaseNameA _GetModuleBaseNameA = (t_GetModuleBaseNameA)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleBaseNameA" ); static t_GetThreadDescription _GetThreadDescription = 0; void WINAPI EventRecordCallback( PEVENT_RECORD record ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif const auto& hdr = record->EventHeader; switch( hdr.ProviderId.Data1 ) { case 0x3d6fa8d1: // Thread Guid if( hdr.EventDescriptor.Opcode == 36 ) { const auto cswitch = (const CSwitch*)record->UserData; TracyLfqPrepare( QueueType::ContextSwitch ); MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId ); MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId ); MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber ); MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason ); MemWrite( &item->contextSwitch.state, cswitch->oldThreadState ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == 50 ) { const auto rt = (const ReadyThread*)record->UserData; TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->threadWakeup.thread, rt->threadId ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 ) { const auto tt = (const ThreadTrace*)record->UserData; uint64_t tid = tt->threadId; if( tid == 0 ) return; uint64_t pid = tt->processId; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, tid ); MemWrite( &item->tidToPid.pid, pid ); TracyLfqCommit; } break; case 0xdef2fe46: // StackWalk Guid if( hdr.EventDescriptor.Opcode == 32 ) { const auto sw = (const StackWalkEvent*)record->UserData; if( sw->stackProcess == s_pid ) { const uint64_t sz = ( record->UserDataLength - 16 ) / 8; if( sz > 0 ) { auto trace = (uint64_t*)tracy_malloc( ( 1 + sz ) * sizeof( uint64_t ) ); memcpy( trace, &sz, sizeof( uint64_t ) ); memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz ); TracyLfqPrepare( QueueType::CallstackSample ); MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp ); MemWrite( &item->callstackSampleFat.thread, sw->stackThread ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } } break; default: break; } } void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif const auto& hdr = record->EventHeader; assert( hdr.ProviderId.Data1 == 0x802EC45A ); assert( hdr.EventDescriptor.Id == 0x0011 ); const auto vs = (const VSyncInfo*)record->UserData; TracyLfqPrepare( QueueType::FrameVsync ); MemWrite( &item->frameVsync.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->frameVsync.id, vs->vidPnTargetId ); TracyLfqCommit; } static void SetupVsync() { #if _WIN32_WINNT >= _WIN32_WINNT_WINBLUE && !defined(__MINGW32__) const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + MAX_PATH; s_propVsync = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz ); memset( s_propVsync, 0, sizeof( EVENT_TRACE_PROPERTIES ) ); s_propVsync->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; s_propVsync->Wnode.BufferSize = psz; #ifdef TRACY_TIMER_QPC s_propVsync->Wnode.ClientContext = 1; #else s_propVsync->Wnode.ClientContext = 3; #endif s_propVsync->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); strcpy( ((char*)s_propVsync) + sizeof( EVENT_TRACE_PROPERTIES ), "TracyVsync" ); auto backup = tracy_malloc( psz ); memcpy( backup, s_propVsync, psz ); const auto controlStatus = ControlTraceA( 0, "TracyVsync", s_propVsync, EVENT_TRACE_CONTROL_STOP ); if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) { tracy_free( backup ); tracy_free( s_propVsync ); return; } memcpy( s_propVsync, backup, psz ); tracy_free( backup ); const auto startStatus = StartTraceA( &s_traceHandleVsync, "TracyVsync", s_propVsync ); if( startStatus != ERROR_SUCCESS ) { tracy_free( s_propVsync ); return; } EVENT_FILTER_EVENT_ID fe = {}; fe.FilterIn = TRUE; fe.Count = 1; fe.Events[0] = 0x0011; // VSyncDPC_Info EVENT_FILTER_DESCRIPTOR desc = {}; desc.Ptr = (ULONGLONG)&fe; desc.Size = sizeof( fe ); desc.Type = EVENT_FILTER_TYPE_EVENT_ID; ENABLE_TRACE_PARAMETERS params = {}; params.Version = ENABLE_TRACE_PARAMETERS_VERSION_2; params.EnableProperty = EVENT_ENABLE_PROPERTY_IGNORE_KEYWORD_0; params.SourceId = s_propVsync->Wnode.Guid; params.EnableFilterDesc = &desc; params.FilterDescCount = 1; uint64_t mask = 0x4000000000000001; // Microsoft_Windows_DxgKrnl_Performance | Base if( EnableTraceEx2( s_traceHandleVsync, &DxgKrnlGuid, EVENT_CONTROL_CODE_ENABLE_PROVIDER, TRACE_LEVEL_INFORMATION, mask, mask, 0, ¶ms ) != ERROR_SUCCESS ) { tracy_free( s_propVsync ); return; } char loggerName[MAX_PATH]; strcpy( loggerName, "TracyVsync" ); EVENT_TRACE_LOGFILEA log = {}; log.LoggerName = loggerName; log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP; log.EventRecordCallback = EventRecordCallbackVsync; s_traceHandleVsync2 = OpenTraceA( &log ); if( s_traceHandleVsync2 == (TRACEHANDLE)INVALID_HANDLE_VALUE ) { CloseTrace( s_traceHandleVsync ); tracy_free( s_propVsync ); return; } s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_threadVsync) Thread( [] (void*) { ThreadExitHandler threadExitHandler; SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); SetThreadName( "Tracy Vsync" ); ProcessTrace( &s_traceHandleVsync2, 1, nullptr, nullptr ); }, nullptr ); #endif } static constexpr int GetSamplingInterval() { return GetSamplingPeriod() / 100; } bool SysTraceStart( int64_t& samplingPeriod ) { if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); s_pid = GetCurrentProcessId(); #if defined _WIN64 constexpr bool isOs64Bit = true; #else BOOL _iswow64; IsWow64Process( GetCurrentProcess(), &_iswow64 ); const bool isOs64Bit = _iswow64; #endif TOKEN_PRIVILEGES priv = {}; priv.PrivilegeCount = 1; priv.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; if( LookupPrivilegeValue( nullptr, SE_SYSTEM_PROFILE_NAME, &priv.Privileges[0].Luid ) == 0 ) return false; HANDLE pt; if( OpenProcessToken( GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &pt ) == 0 ) return false; const auto adjust = AdjustTokenPrivileges( pt, FALSE, &priv, 0, nullptr, nullptr ); CloseHandle( pt ); if( adjust == 0 ) return false; const auto status = GetLastError(); if( status != ERROR_SUCCESS ) return false; if( isOs64Bit ) { TRACE_PROFILE_INTERVAL interval = {}; interval.Interval = GetSamplingInterval(); const auto intervalStatus = TraceSetInformation( 0, TraceSampledProfileIntervalInfo, &interval, sizeof( interval ) ); if( intervalStatus != ERROR_SUCCESS ) return false; samplingPeriod = GetSamplingPeriod(); } const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + sizeof( KERNEL_LOGGER_NAME ); s_prop = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz ); memset( s_prop, 0, sizeof( EVENT_TRACE_PROPERTIES ) ); ULONG flags = 0; #ifndef TRACY_NO_CONTEXT_SWITCH flags = EVENT_TRACE_FLAG_CSWITCH | EVENT_TRACE_FLAG_DISPATCHER | EVENT_TRACE_FLAG_THREAD; #endif #ifndef TRACY_NO_SAMPLING if( isOs64Bit ) flags |= EVENT_TRACE_FLAG_PROFILE; #endif s_prop->EnableFlags = flags; s_prop->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; s_prop->Wnode.BufferSize = psz; s_prop->Wnode.Flags = WNODE_FLAG_TRACED_GUID; #ifdef TRACY_TIMER_QPC s_prop->Wnode.ClientContext = 1; #else s_prop->Wnode.ClientContext = 3; #endif s_prop->Wnode.Guid = SystemTraceControlGuid; s_prop->BufferSize = 1024; s_prop->MinimumBuffers = std::thread::hardware_concurrency() * 4; s_prop->MaximumBuffers = std::thread::hardware_concurrency() * 6; s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) ); auto backup = tracy_malloc( psz ); memcpy( backup, s_prop, psz ); const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) { tracy_free( backup ); tracy_free( s_prop ); return false; } memcpy( s_prop, backup, psz ); tracy_free( backup ); const auto startStatus = StartTrace( &s_traceHandle, KERNEL_LOGGER_NAME, s_prop ); if( startStatus != ERROR_SUCCESS ) { tracy_free( s_prop ); return false; } #ifndef TRACY_NO_SAMPLING if( isOs64Bit ) { CLASSIC_EVENT_ID stackId[2] = {}; stackId[0].EventGuid = PerfInfoGuid; stackId[0].Type = 46; stackId[1].EventGuid = ThreadV2Guid; stackId[1].Type = 36; const auto stackStatus = TraceSetInformation( s_traceHandle, TraceStackTracingInfo, &stackId, sizeof( stackId ) ); if( stackStatus != ERROR_SUCCESS ) { tracy_free( s_prop ); return false; } } #endif #ifdef UNICODE WCHAR KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )]; #else char KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )]; #endif memcpy( KernelLoggerName, KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) ); EVENT_TRACE_LOGFILE log = {}; log.LoggerName = KernelLoggerName; log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP; log.EventRecordCallback = EventRecordCallback; s_traceHandle2 = OpenTrace( &log ); if( s_traceHandle2 == (TRACEHANDLE)INVALID_HANDLE_VALUE ) { CloseTrace( s_traceHandle ); tracy_free( s_prop ); return false; } #ifndef TRACY_NO_VSYNC_CAPTURE SetupVsync(); #endif return true; } void SysTraceStop() { if( s_threadVsync ) { CloseTrace( s_traceHandleVsync2 ); CloseTrace( s_traceHandleVsync ); s_threadVsync->~Thread(); tracy_free( s_threadVsync ); } CloseTrace( s_traceHandle2 ); CloseTrace( s_traceHandle ); } void SysTraceWorker( void* ptr ) { ThreadExitHandler threadExitHandler; SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); SetThreadName( "Tracy SysTrace" ); ProcessTrace( &s_traceHandle2, 1, 0, 0 ); ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); tracy_free( s_prop ); } void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name ) { bool threadSent = false; auto hnd = OpenThread( THREAD_QUERY_INFORMATION, FALSE, DWORD( thread ) ); if( hnd == 0 ) { hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, DWORD( thread ) ); } if( hnd != 0 ) { if( _GetThreadDescription ) { PWSTR tmp; _GetThreadDescription( hnd, &tmp ); char buf[256]; if( tmp ) { auto ret = wcstombs( buf, tmp, 256 ); if( ret != 0 ) { threadName = CopyString( buf, ret ); threadSent = true; } } } const auto pid = GetProcessIdOfThread( hnd ); if( !threadSent && NtQueryInformationThread && _EnumProcessModules && _GetModuleInformation && _GetModuleBaseNameA ) { void* ptr; ULONG retlen; auto status = NtQueryInformationThread( hnd, (THREADINFOCLASS)9 /*ThreadQuerySetWin32StartAddress*/, &ptr, sizeof( &ptr ), &retlen ); if( status == 0 ) { const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid ); if( phnd != INVALID_HANDLE_VALUE ) { HMODULE modules[1024]; DWORD needed; if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 ) { const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) ); for( DWORD i=0; i= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage ) { char buf2[1024]; const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 ); if( modlen != 0 ) { threadName = CopyString( buf2, modlen ); threadSent = true; } } } } } CloseHandle( phnd ); } } } CloseHandle( hnd ); if( !threadSent ) { threadName = CopyString( "???", 3 ); threadSent = true; } if( pid != 0 ) { { uint64_t _pid = pid; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, thread ); MemWrite( &item->tidToPid.pid, _pid ); TracyLfqCommit; } if( pid == 4 ) { name = CopyStringFast( "System", 6 ); return; } else { const auto phnd = OpenProcess( PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid ); if( phnd != INVALID_HANDLE_VALUE ) { char buf2[1024]; const auto sz = GetProcessImageFileNameA( phnd, buf2, 1024 ); CloseHandle( phnd ); if( sz != 0 ) { auto ptr = buf2 + sz - 1; while( ptr > buf2 && *ptr != '\\' ) ptr--; if( *ptr == '\\' ) ptr++; name = CopyStringFast( ptr ); return; } } } } } if( !threadSent ) { threadName = CopyString( "???", 3 ); } name = CopyStringFast( "???", 3 ); } } # elif defined __linux__ # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # if defined __i386 || defined __x86_64__ # include "TracyCpuid.hpp" # endif # include "TracyProfiler.hpp" # include "TracyRingBuffer.hpp" # include "TracyThread.hpp" namespace tracy { static std::atomic traceActive { false }; static int s_numCpus = 0; static int s_numBuffers = 0; static int s_ctxBufferIdx = 0; static RingBuffer* s_ring = nullptr; static const int ThreadHashSize = 4 * 1024; static uint32_t s_threadHash[ThreadHashSize] = {}; static bool CurrentProcOwnsThread( uint32_t tid ) { const auto hash = tid & ( ThreadHashSize-1 ); const auto hv = s_threadHash[hash]; if( hv == tid ) return true; if( hv == -tid ) return false; char path[256]; sprintf( path, "/proc/self/task/%d", tid ); struct stat st; if( stat( path, &st ) == 0 ) { s_threadHash[hash] = tid; return true; } else { s_threadHash[hash] = -tid; return false; } } static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags ) { return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags ); } enum TraceEventId { EventCallstack, EventCpuCycles, EventInstructionsRetired, EventCacheReference, EventCacheMiss, EventBranchRetired, EventBranchMiss, EventVsync, EventContextSwitch, EventWakeup, }; static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid ) { pe.config = config1; pe.precise_ip = 3; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } pe.config = config0; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip ); } static void ProbePreciseIp( perf_event_attr& pe, pid_t pid ) { pe.precise_ip = 3; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip ); } static bool IsGenuineIntel() { #if defined __i386 || defined __x86_64__ uint32_t regs[4] = {}; __get_cpuid( 0, regs, regs+1, regs+2, regs+3 ); char manufacturer[12]; memcpy( manufacturer, regs+1, 4 ); memcpy( manufacturer+4, regs+3, 4 ); memcpy( manufacturer+8, regs+2, 4 ); return memcmp( manufacturer, "GenuineIntel", 12 ) == 0; #else return false; #endif } static const char* ReadFile( const char* path ) { int fd = open( path, O_RDONLY ); if( fd < 0 ) return nullptr; static char tmp[64]; const auto cnt = read( fd, tmp, 63 ); close( fd ); if( cnt < 0 ) return nullptr; tmp[cnt] = '\0'; return tmp; } bool SysTraceStart( int64_t& samplingPeriod ) { #ifndef CLOCK_MONOTONIC_RAW return false; #endif const auto paranoidLevelStr = ReadFile( "/proc/sys/kernel/perf_event_paranoid" ); if( !paranoidLevelStr ) return false; #ifdef TRACY_VERBOSE int paranoidLevel = 2; paranoidLevel = atoi( paranoidLevelStr ); TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel ); #endif int switchId = -1, wakeupId = -1, vsyncId = -1; const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" ); if( switchIdStr ) switchId = atoi( switchIdStr ); const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" ); if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr ); const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" ); if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr ); TracyDebug( "sched_switch id: %i\n", switchId ); TracyDebug( "sched_wakeup id: %i\n", wakeupId ); TracyDebug( "drm_vblank_event id: %i\n", vsyncId ); #ifdef TRACY_NO_SAMPLING const bool noSoftwareSampling = true; #else const char* noSoftwareSamplingEnv = GetEnvVar( "TRACY_NO_SAMPLING" ); const bool noSoftwareSampling = noSoftwareSamplingEnv && noSoftwareSamplingEnv[0] == '1'; #endif #ifdef TRACY_NO_SAMPLE_RETIREMENT const bool noRetirement = true; #else const char* noRetirementEnv = GetEnvVar( "TRACY_NO_SAMPLE_RETIREMENT" ); const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1'; #endif #ifdef TRACY_NO_SAMPLE_CACHE const bool noCache = true; #else const char* noCacheEnv = GetEnvVar( "TRACY_NO_SAMPLE_CACHE" ); const bool noCache = noCacheEnv && noCacheEnv[0] == '1'; #endif #ifdef TRACY_NO_SAMPLE_BRANCH const bool noBranch = true; #else const char* noBranchEnv = GetEnvVar( "TRACY_NO_SAMPLE_BRANCH" ); const bool noBranch = noBranchEnv && noBranchEnv[0] == '1'; #endif #ifdef TRACY_NO_CONTEXT_SWITCH const bool noCtxSwitch = true; #else const char* noCtxSwitchEnv = GetEnvVar( "TRACY_NO_CONTEXT_SWITCH" ); const bool noCtxSwitch = noCtxSwitchEnv && noCtxSwitchEnv[0] == '1'; #endif #ifdef TRACY_NO_VSYNC_CAPTURE const bool noVsync = true; #else const char* noVsyncEnv = GetEnvVar( "TRACY_NO_VSYNC_CAPTURE" ); const bool noVsync = noVsyncEnv && noVsyncEnv[0] == '1'; #endif samplingPeriod = GetSamplingPeriod(); uint32_t currentPid = (uint32_t)getpid(); s_numCpus = (int)std::thread::hardware_concurrency(); const auto maxNumBuffers = s_numCpus * ( 1 + // software sampling 2 + // CPU cycles + instructions retired 2 + // cache reference + miss 2 + // branch retired + miss 2 + // context switches + wakeups 1 // vsync ); s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers ); s_numBuffers = 0; // software sampling perf_event_attr pe = {}; pe.type = PERF_TYPE_SOFTWARE; pe.size = sizeof( perf_event_attr ); pe.config = PERF_COUNT_SW_CPU_CLOCK; pe.sample_freq = GetSamplingFrequency(); pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN; #if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 ) pe.sample_max_stack = 127; #endif pe.disabled = 1; pe.freq = 1; pe.inherit = 1; #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) pe.use_clockid = 1; pe.clockid = CLOCK_MONOTONIC_RAW; #endif if( !noSoftwareSampling ) { TracyDebug( "Setup software sampling\n" ); ProbePreciseIp( pe, currentPid ); for( int i=0; i= KERNEL_VERSION( 4, 8, 0 ) pe.sample_max_stack = 127; #endif pe.disabled = 1; pe.inherit = 1; pe.config = switchId; #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) pe.use_clockid = 1; pe.clockid = CLOCK_MONOTONIC_RAW; #endif TracyDebug( "Setup context switch capture\n" ); for( int i=0; i> 63; const auto m2 = test >> 47; if( m1 == m2 ) break; } while( --cnt > 0 ); for( uint64_t j=1; j> 63; const auto m2 = test >> 47; if( m1 != m2 ) trace[j] = 0; } #endif for( uint64_t j=1; j<=cnt; j++ ) { if( trace[j] >= (uint64_t)-4095 ) // PERF_CONTEXT_MAX { memmove( trace+j, trace+j+1, sizeof( uint64_t ) * ( cnt - j ) ); cnt--; } } memcpy( trace, &cnt, sizeof( uint64_t ) ); return trace; } void SysTraceWorker( void* ptr ) { ThreadExitHandler threadExitHandler; SetThreadName( "Tracy Sampling" ); InitRpmalloc(); sched_param sp = { 99 }; if( pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ) != 0 ) TracyDebug( "Failed to increase SysTraceWorker thread priority!\n" ); auto ctxBufferIdx = s_ctxBufferIdx; auto ringArray = s_ring; auto numBuffers = s_numBuffers; for( int i=0; i tail ); hadData = true; const auto id = ring.GetId(); assert( id != EventContextSwitch ); const auto end = head - tail; uint64_t pos = 0; if( id == EventCallstack ) { while( pos < end ) { perf_event_header hdr; ring.Read( &hdr, pos, sizeof( perf_event_header ) ); if( hdr.type == PERF_RECORD_SAMPLE ) { auto offset = pos + sizeof( perf_event_header ); // Layout: // u32 pid, tid // u64 time // u64 cnt // u64 ip[cnt] uint32_t tid; uint64_t t0; uint64_t cnt; offset += sizeof( uint32_t ); ring.Read( &tid, offset, sizeof( uint32_t ) ); offset += sizeof( uint32_t ); ring.Read( &t0, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); ring.Read( &cnt, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); if( cnt > 0 ) { #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif auto trace = GetCallstackBlock( cnt, ring, offset ); TracyLfqPrepare( QueueType::CallstackSample ); MemWrite( &item->callstackSampleFat.time, t0 ); MemWrite( &item->callstackSampleFat.thread, tid ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } pos += hdr.size; } } else { while( pos < end ) { perf_event_header hdr; ring.Read( &hdr, pos, sizeof( perf_event_header ) ); if( hdr.type == PERF_RECORD_SAMPLE ) { auto offset = pos + sizeof( perf_event_header ); // Layout: // u64 ip // u64 time uint64_t ip, t0; ring.Read( &ip, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); ring.Read( &t0, offset, sizeof( uint64_t ) ); #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif QueueType type; switch( id ) { case EventCpuCycles: type = QueueType::HwSampleCpuCycle; break; case EventInstructionsRetired: type = QueueType::HwSampleInstructionRetired; break; case EventCacheReference: type = QueueType::HwSampleCacheReference; break; case EventCacheMiss: type = QueueType::HwSampleCacheMiss; break; case EventBranchRetired: type = QueueType::HwSampleBranchRetired; break; case EventBranchMiss: type = QueueType::HwSampleBranchMiss; break; default: abort(); } TracyLfqPrepare( type ); MemWrite( &item->hwSample.ip, ip ); MemWrite( &item->hwSample.time, t0 ); TracyLfqCommit; } pos += hdr.size; } } assert( pos == end ); ring.Advance( end ); } if( !traceActive.load( std::memory_order_relaxed ) ) break; if( ctxBufferIdx != numBuffers ) { const auto ctxBufNum = numBuffers - ctxBufferIdx; int activeNum = 0; uint16_t active[512]; uint32_t end[512]; uint32_t pos[512]; for( int i=0; i 0 ) { hadData = true; while( activeNum > 0 ) { int sel = -1; int selPos; int64_t t0 = std::numeric_limits::max(); for( int i=0; i= 0 ) { auto& ring = ringArray[ctxBufferIdx + sel]; auto rbPos = pos[sel]; auto offset = rbPos; perf_event_header hdr; ring.Read( &hdr, offset, sizeof( perf_event_header ) ); #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif const auto rid = ring.GetId(); if( rid == EventContextSwitch ) { // Layout: // u64 time // u64 cnt // u64 ip[cnt] // u32 size // u8 data[size] // Data (not ABI stable, but has not changed since it was added, in 2009): // u8 hdr[8] // u8 prev_comm[16] // u32 prev_pid // u32 prev_prio // lng prev_state // u8 next_comm[16] // u32 next_pid // u32 next_prio offset += sizeof( perf_event_header ) + sizeof( uint64_t ); uint64_t cnt; ring.Read( &cnt, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); const auto traceOffset = offset; offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16; uint32_t prev_pid, next_pid; long prev_state; ring.Read( &prev_pid, offset, sizeof( uint32_t ) ); offset += sizeof( uint32_t ) + sizeof( uint32_t ); ring.Read( &prev_state, offset, sizeof( long ) ); offset += sizeof( long ) + 16; ring.Read( &next_pid, offset, sizeof( uint32_t ) ); uint8_t reason = 100; uint8_t state; if( prev_state & 0x0001 ) state = 104; else if( prev_state & 0x0002 ) state = 101; else if( prev_state & 0x0004 ) state = 105; else if( prev_state & 0x0008 ) state = 106; else if( prev_state & 0x0010 ) state = 108; else if( prev_state & 0x0020 ) state = 109; else if( prev_state & 0x0040 ) state = 110; else if( prev_state & 0x0080 ) state = 102; else state = 103; TracyLfqPrepare( QueueType::ContextSwitch ); MemWrite( &item->contextSwitch.time, t0 ); MemWrite( &item->contextSwitch.oldThread, prev_pid ); MemWrite( &item->contextSwitch.newThread, next_pid ); MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) ); MemWrite( &item->contextSwitch.reason, reason ); MemWrite( &item->contextSwitch.state, state ); TracyLfqCommit; if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) ) { auto trace = GetCallstackBlock( cnt, ring, traceOffset ); TracyLfqPrepare( QueueType::CallstackSampleContextSwitch ); MemWrite( &item->callstackSampleFat.time, t0 ); MemWrite( &item->callstackSampleFat.thread, prev_pid ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } else if( rid == EventWakeup ) { // Layout: // u64 time // u32 size // u8 data[size] // Data: // u8 hdr[8] // u8 comm[16] // u32 pid // u32 prio // u64 target_cpu offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16; uint32_t pid; ring.Read( &pid, offset, sizeof( uint32_t ) ); TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, t0 ); MemWrite( &item->threadWakeup.thread, pid ); TracyLfqCommit; } else { assert( rid == EventVsync ); // Layout: // u64 time // u32 size // u8 data[size] // Data (not ABI stable): // u8 hdr[8] // i32 crtc // u32 seq // i64 ktime // u8 high precision offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8; int32_t crtc; ring.Read( &crtc, offset, sizeof( int32_t ) ); // Note: The timestamp value t0 might be off by a number of microseconds from the // true hardware vblank event. The ktime value should be used instead, but it is // measured in CLOCK_MONOTONIC time. Tracy only supports the timestamp counter // register (TSC) or CLOCK_MONOTONIC_RAW clock. #if 0 offset += sizeof( uint32_t ) * 2; int64_t ktime; ring.Read( &ktime, offset, sizeof( int64_t ) ); #endif TracyLfqPrepare( QueueType::FrameVsync ); MemWrite( &item->frameVsync.id, crtc ); MemWrite( &item->frameVsync.time, t0 ); TracyLfqCommit; } rbPos += hdr.size; if( rbPos == end[sel] ) { memmove( active+selPos, active+selPos+1, sizeof(*active) * ( activeNum - selPos - 1 ) ); activeNum--; } else { pos[sel] = rbPos; } } } for( int i=0; i 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0'; threadName = CopyString( buf ); fclose( f ); } else { threadName = CopyString( "???", 3 ); } sprintf( fn, "/proc/%" PRIu64 "/status", thread ); f = fopen( fn, "rb" ); if( f ) { char* tmp = (char*)tracy_malloc_fast( 8*1024 ); const auto fsz = (ptrdiff_t)fread( tmp, 1, 8*1024, f ); fclose( f ); int pid = -1; auto line = tmp; for(;;) { if( memcmp( "Tgid:\t", line, 6 ) == 0 ) { pid = atoi( line + 6 ); break; } while( line - tmp < fsz && *line != '\n' ) line++; if( *line != '\n' ) break; line++; } tracy_free_fast( tmp ); if( pid >= 0 ) { { uint64_t _pid = pid; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, thread ); MemWrite( &item->tidToPid.pid, _pid ); TracyLfqCommit; } sprintf( fn, "/proc/%i/comm", pid ); f = fopen( fn, "rb" ); if( f ) { char buf[256]; const auto sz = fread( buf, 1, 256, f ); if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0'; name = CopyStringFast( buf ); fclose( f ); return; } } } name = CopyStringFast( "???", 3 ); } } # endif #endif