#ifdef TRACY_ENABLE #ifdef _WIN32 # ifndef NOMINMAX # define NOMINMAX # endif # include # include # include # include # include # include "../common/TracyUwp.hpp" #else # include # include #endif #ifdef _GNU_SOURCE # include #endif #ifdef __linux__ # include # include # include # include #endif #if defined __APPLE__ || defined BSD # include # include #endif #if defined __APPLE__ # include "TargetConditionals.h" # include #endif #ifdef __ANDROID__ # include # include # include # include # include # include #endif #ifdef __QNX__ # include # include # include # include # include #endif #include #include #include #include #include #include #include #include #include #include #include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" #include "../common/TracySocket.hpp" #include "../common/TracySystem.hpp" #include "../common/TracyYield.hpp" #include "../common/tracy_lz4.hpp" #include "tracy_rpmalloc.hpp" #include "TracyCallstack.hpp" #include "TracyDebug.hpp" #include "TracyDxt1.hpp" #include "TracyScoped.hpp" #include "TracyProfiler.hpp" #include "TracyThread.hpp" #include "TracyArmCpuTable.hpp" #include "TracySysTrace.hpp" #include "../tracy/TracyC.h" #ifdef TRACY_PORT # ifndef TRACY_DATA_PORT # define TRACY_DATA_PORT TRACY_PORT # endif # ifndef TRACY_BROADCAST_PORT # define TRACY_BROADCAST_PORT TRACY_PORT # endif #endif #ifdef __APPLE__ # ifndef TRACY_DELAYED_INIT # define TRACY_DELAYED_INIT # endif #else # ifdef __GNUC__ # define init_order( val ) __attribute__ ((init_priority(val))) # else # define init_order(x) # endif #endif #if defined _WIN32 # include extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); extern "C" typedef char* (WINAPI *t_WineGetVersion)(); extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); #else # include # include # include #endif #if defined __linux__ # include # include #endif #if !defined _WIN32 && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) # include "TracyCpuid.hpp" #endif #if !( ( defined _WIN32 && _WIN32_WINNT >= _WIN32_WINNT_VISTA ) || defined __linux__ ) # include #endif #ifdef __QNX__ extern char* __progname; #endif namespace tracy { #ifdef __ANDROID__ // Implementation helpers of EnsureReadable(address). // This is so far only needed on Android, where it is common for libraries to be mapped // with only executable, not readable, permissions. Typical example (line from /proc/self/maps): /* 746b63b000-746b6dc000 --xp 00042000 07:48 35 /apex/com.android.runtime/lib64/bionic/libc.so */ // See https://github.com/wolfpld/tracy/issues/125 . // To work around this, we parse /proc/self/maps and we use mprotect to set read permissions // on any mappings that contain symbols addresses hit by HandleSymbolCodeQuery. namespace { // Holds some information about a single memory mapping. struct MappingInfo { // Start of address range. Inclusive. uintptr_t start_address; // End of address range. Exclusive, so the mapping is the half-open interval // [start, end) and its length in bytes is `end - start`. As in /proc/self/maps. uintptr_t end_address; // Read/Write/Executable permissions. bool perm_r, perm_w, perm_x; }; } // anonymous namespace // Internal implementation helper for LookUpMapping(address). // // Parses /proc/self/maps returning a vector. // /proc/self/maps is assumed to be sorted by ascending address, so the resulting // vector is sorted by ascending address too. static std::vector ParseMappings() { std::vector result; FILE* file = fopen( "/proc/self/maps", "r" ); if( !file ) return result; char line[1024]; while( fgets( line, sizeof( line ), file ) ) { uintptr_t start_addr; uintptr_t end_addr; #if defined(__LP64__) if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue; #else if (sscanf( line, "%dx-%dx", &start_addr, &end_addr ) != 2 ) continue; #endif char* first_space = strchr( line, ' ' ); if( !first_space ) continue; char* perm = first_space + 1; char* second_space = strchr( perm, ' ' ); if( !second_space || second_space - perm != 4 ) continue; result.emplace_back(); auto& mapping = result.back(); mapping.start_address = start_addr; mapping.end_address = end_addr; mapping.perm_r = perm[0] == 'r'; mapping.perm_w = perm[1] == 'w'; mapping.perm_x = perm[2] == 'x'; } fclose( file ); return result; } // Internal implementation helper for LookUpMapping(address). // // Takes as input an `address` and a known vector `mappings`, assumed to be // sorted by increasing addresses, as /proc/self/maps seems to be. // Returns a pointer to the MappingInfo describing the mapping that this // address belongs to, or nullptr if the address isn't in `mappings`. static MappingInfo* LookUpMapping(std::vector& mappings, uintptr_t address) { // Comparison function for std::lower_bound. Returns true if all addresses in `m1` // are lower than `addr`. auto Compare = []( const MappingInfo& m1, uintptr_t addr ) { // '<=' because the address ranges are half-open intervals, [start, end). return m1.end_address <= addr; }; auto iter = std::lower_bound( mappings.begin(), mappings.end(), address, Compare ); if( iter == mappings.end() || iter->start_address > address) { return nullptr; } return &*iter; } // Internal implementation helper for EnsureReadable(address). // // Takes as input an `address` and returns a pointer to a MappingInfo // describing the mapping that this address belongs to, or nullptr if // the address isn't in any known mapping. // // This function is stateful and not reentrant (assumes to be called from // only one thread). It holds a vector of mappings parsed from /proc/self/maps. // // Attempts to react to mappings changes by re-parsing /proc/self/maps. static MappingInfo* LookUpMapping(uintptr_t address) { // Static state managed by this function. Not constant, we mutate that state as // we turn some mappings readable. Initially parsed once here, updated as needed below. static std::vector s_mappings = ParseMappings(); MappingInfo* mapping = LookUpMapping( s_mappings, address ); if( mapping ) return mapping; // This address isn't in any known mapping. Try parsing again, maybe // mappings changed. s_mappings = ParseMappings(); return LookUpMapping( s_mappings, address ); } // Internal implementation helper for EnsureReadable(address). // // Attempts to make the specified `mapping` readable if it isn't already. // Returns true if and only if the mapping is readable. static bool EnsureReadable( MappingInfo& mapping ) { if( mapping.perm_r ) { // The mapping is already readable. return true; } int prot = PROT_READ; if( mapping.perm_w ) prot |= PROT_WRITE; if( mapping.perm_x ) prot |= PROT_EXEC; if( mprotect( reinterpret_cast( mapping.start_address ), mapping.end_address - mapping.start_address, prot ) == -1 ) { // Failed to make the mapping readable. Shouldn't happen, hasn't // been observed yet. If it happened in practice, we should consider // adding a bool to MappingInfo to track this to avoid retrying mprotect // everytime on such mappings. return false; } // The mapping is now readable. Update `mapping` so the next call will be fast. mapping.perm_r = true; return true; } // Attempts to set the read permission on the entire mapping containing the // specified address. Returns true if and only if the mapping is now readable. static bool EnsureReadable( uintptr_t address ) { MappingInfo* mapping = LookUpMapping(address); return mapping && EnsureReadable( *mapping ); } #elif defined WIN32 static bool EnsureReadable( uintptr_t address ) { MEMORY_BASIC_INFORMATION memInfo; VirtualQuery( reinterpret_cast( address ), &memInfo, sizeof( memInfo ) ); return memInfo.Protect != PAGE_NOACCESS; } #else static bool EnsureReadable( uintptr_t address ) { return true; } #endif #ifndef TRACY_DELAYED_INIT struct InitTimeWrapper { int64_t val; }; struct ProducerWrapper { tracy::moodycamel::ConcurrentQueue::ExplicitProducer* ptr; }; struct ThreadHandleWrapper { uint32_t val; }; #endif #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 static inline void CpuId( uint32_t* regs, uint32_t leaf ) { memset(regs, 0, sizeof(uint32_t) * 4); #if defined _MSC_VER __cpuidex( (int*)regs, leaf, 0 ); #else __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 ); #endif } static void InitFailure( const char* msg ) { #if defined _WIN32 bool hasConsole = false; bool reopen = false; const auto attached = AttachConsole( ATTACH_PARENT_PROCESS ); if( attached ) { hasConsole = true; reopen = true; } else { const auto err = GetLastError(); if( err == ERROR_ACCESS_DENIED ) { hasConsole = true; } } if( hasConsole ) { fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); if( reopen ) { freopen( "CONOUT$", "w", stderr ); fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); } } else { # ifndef TRACY_UWP MessageBoxA( nullptr, msg, "Tracy Profiler initialization failure", MB_ICONSTOP ); # endif } #else fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); #endif exit( 1 ); } static bool CheckHardwareSupportsInvariantTSC() { const char* noCheck = GetEnvVar( "TRACY_NO_INVARIANT_CHECK" ); if( noCheck && noCheck[0] == '1' ) return true; uint32_t regs[4]; CpuId( regs, 1 ); if( !( regs[3] & ( 1 << 4 ) ) ) { #if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK InitFailure( "CPU doesn't support RDTSC instruction." ); #else return false; #endif } CpuId( regs, 0x80000007 ); if( regs[3] & ( 1 << 8 ) ) return true; return false; } #if defined TRACY_TIMER_FALLBACK && defined TRACY_HW_TIMER bool HardwareSupportsInvariantTSC() { static bool cachedResult = CheckHardwareSupportsInvariantTSC(); return cachedResult; } #endif static int64_t SetupHwTimer() { #if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK if( !CheckHardwareSupportsInvariantTSC() ) { #if defined _WIN32 InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC or TRACY_TIMER_FALLBACK define to use lower resolution timer." ); #else InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_FALLBACK define to use lower resolution timer." ); #endif } #endif return Profiler::GetTime(); } #else static int64_t SetupHwTimer() { return Profiler::GetTime(); } #endif static const char* GetProcessName() { const char* processName = "unknown"; #ifdef _WIN32 static char buf[_MAX_PATH]; GetModuleFileNameA( nullptr, buf, _MAX_PATH ); const char* ptr = buf; while( *ptr != '\0' ) ptr++; while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--; if( ptr > buf ) ptr++; processName = ptr; #elif defined __ANDROID__ # if __ANDROID_API__ >= 21 auto buf = getprogname(); if( buf ) processName = buf; # endif #elif defined __linux__ && defined _GNU_SOURCE if( program_invocation_short_name ) processName = program_invocation_short_name; #elif defined __APPLE__ || defined BSD auto buf = getprogname(); if( buf ) processName = buf; #elif defined __QNX__ processName = __progname; #endif return processName; } static const char* GetProcessExecutablePath() { #ifdef _WIN32 static char buf[_MAX_PATH]; GetModuleFileNameA( nullptr, buf, _MAX_PATH ); return buf; #elif defined __ANDROID__ return nullptr; #elif defined __linux__ && defined _GNU_SOURCE return program_invocation_name; #elif defined __APPLE__ static char buf[1024]; uint32_t size = 1024; _NSGetExecutablePath( buf, &size ); return buf; #elif defined __DragonFly__ static char buf[1024]; readlink( "/proc/curproc/file", buf, 1024 ); return buf; #elif defined __FreeBSD__ static char buf[1024]; int mib[4]; mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_PATHNAME; mib[3] = -1; size_t cb = 1024; sysctl( mib, 4, buf, &cb, nullptr, 0 ); return buf; #elif defined __NetBSD__ static char buf[1024]; readlink( "/proc/curproc/exe", buf, 1024 ); return buf; #elif defined __QNX__ static char buf[_PC_PATH_MAX + 1]; _cmdname(buf); return buf; #else return nullptr; #endif } #if defined __linux__ && defined __ARM_ARCH static uint32_t GetHex( char*& ptr, int skip ) { uint32_t ret; ptr += skip; char* end; if( ptr[0] == '0' && ptr[1] == 'x' ) { ptr += 2; ret = strtol( ptr, &end, 16 ); } else { ret = strtol( ptr, &end, 10 ); } ptr = end; return ret; } #endif static const char* GetHostInfo() { static char buf[1024]; auto ptr = buf; #if defined _WIN32 # ifdef TRACY_UWP auto GetVersion = &::GetVersionEx; # else auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" ); # endif if( !GetVersion ) { # ifdef __MINGW32__ ptr += sprintf( ptr, "OS: Windows (MingW)\n" ); # else ptr += sprintf( ptr, "OS: Windows\n" ); # endif } else { RTL_OSVERSIONINFOW ver = { sizeof( RTL_OSVERSIONINFOW ) }; GetVersion( &ver ); # ifdef __MINGW32__ ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); # else auto WineGetVersion = (t_WineGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_version" ); auto WineGetBuildId = (t_WineGetBuildId)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_build_id" ); if( WineGetVersion && WineGetBuildId ) { ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu (Wine %s [%s])\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber, WineGetVersion(), WineGetBuildId() ); } else { ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); } # endif } #elif defined __linux__ struct utsname utsName; uname( &utsName ); # if defined __ANDROID__ ptr += sprintf( ptr, "OS: Linux %s (Android)\n", utsName.release ); # else ptr += sprintf( ptr, "OS: Linux %s\n", utsName.release ); # endif #elif defined __APPLE__ # if TARGET_OS_IPHONE == 1 ptr += sprintf( ptr, "OS: Darwin (iOS)\n" ); # elif TARGET_OS_MAC == 1 ptr += sprintf( ptr, "OS: Darwin (OSX)\n" ); # else ptr += sprintf( ptr, "OS: Darwin (unknown)\n" ); # endif #elif defined __DragonFly__ ptr += sprintf( ptr, "OS: BSD (DragonFly)\n" ); #elif defined __FreeBSD__ ptr += sprintf( ptr, "OS: BSD (FreeBSD)\n" ); #elif defined __NetBSD__ ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" ); #elif defined __OpenBSD__ ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" ); #elif defined __QNX__ ptr += sprintf( ptr, "OS: QNX\n" ); #else ptr += sprintf( ptr, "OS: unknown\n" ); #endif #if defined _MSC_VER # if defined __clang__ ptr += sprintf( ptr, "Compiler: MSVC clang-cl %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); # else ptr += sprintf( ptr, "Compiler: MSVC %i\n", _MSC_VER ); # endif #elif defined __clang__ ptr += sprintf( ptr, "Compiler: clang %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); #elif defined __GNUC__ ptr += sprintf( ptr, "Compiler: gcc %i.%i.%i\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ ); #else ptr += sprintf( ptr, "Compiler: unknown\n" ); #endif #if defined _WIN32 InitWinSock(); char hostname[512]; gethostname( hostname, 512 ); # ifdef TRACY_UWP const char* user = ""; # else DWORD userSz = UNLEN+1; char user[UNLEN+1]; GetUserNameA( user, &userSz ); # endif ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); #else char hostname[_POSIX_HOST_NAME_MAX]{}; char user[_POSIX_LOGIN_NAME_MAX]{}; gethostname( hostname, _POSIX_HOST_NAME_MAX ); # if defined __ANDROID__ const auto login = getlogin(); if( login ) { strcpy( user, login ); } else { memcpy( user, "(?)", 4 ); } # else getlogin_r( user, _POSIX_LOGIN_NAME_MAX ); # endif ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); #endif #if defined __i386 || defined _M_IX86 ptr += sprintf( ptr, "Arch: x86\n" ); #elif defined __x86_64__ || defined _M_X64 ptr += sprintf( ptr, "Arch: x64\n" ); #elif defined __aarch64__ ptr += sprintf( ptr, "Arch: ARM64\n" ); #elif defined __ARM_ARCH ptr += sprintf( ptr, "Arch: ARM\n" ); #else ptr += sprintf( ptr, "Arch: unknown\n" ); #endif #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 uint32_t regs[4]; char cpuModel[4*4*3+1] = {}; auto modelPtr = cpuModel; for( uint32_t i=0x80000002; i<0x80000005; ++i ) { CpuId( regs, i ); memcpy( modelPtr, regs, sizeof( regs ) ); modelPtr += sizeof( regs ); } ptr += sprintf( ptr, "CPU: %s\n", cpuModel ); #elif defined __linux__ && defined __ARM_ARCH bool cpuFound = false; FILE* fcpuinfo = fopen( "/proc/cpuinfo", "rb" ); if( fcpuinfo ) { enum { BufSize = 4*1024 }; char buf[BufSize]; const auto sz = fread( buf, 1, BufSize, fcpuinfo ); fclose( fcpuinfo ); const auto end = buf + sz; auto cptr = buf; uint32_t impl = 0; uint32_t var = 0; uint32_t part = 0; uint32_t rev = 0; while( end - cptr > 20 ) { while( end - cptr > 20 && memcmp( cptr, "CPU ", 4 ) != 0 ) { cptr += 4; while( end - cptr > 20 && *cptr != '\n' ) cptr++; cptr++; } if( end - cptr <= 20 ) break; cptr += 4; if( memcmp( cptr, "implementer\t: ", 14 ) == 0 ) { if( impl != 0 ) break; impl = GetHex( cptr, 14 ); } else if( memcmp( cptr, "variant\t: ", 10 ) == 0 ) var = GetHex( cptr, 10 ); else if( memcmp( cptr, "part\t: ", 7 ) == 0 ) part = GetHex( cptr, 7 ); else if( memcmp( cptr, "revision\t: ", 11 ) == 0 ) rev = GetHex( cptr, 11 ); while( *cptr != '\n' && *cptr != '\0' ) cptr++; cptr++; } if( impl != 0 || var != 0 || part != 0 || rev != 0 ) { cpuFound = true; ptr += sprintf( ptr, "CPU: %s%s r%ip%i\n", DecodeArmImplementer( impl ), DecodeArmPart( impl, part ), var, rev ); } } if( !cpuFound ) { ptr += sprintf( ptr, "CPU: unknown\n" ); } #elif defined __APPLE__ && TARGET_OS_IPHONE == 1 { size_t sz; sysctlbyname( "hw.machine", nullptr, &sz, nullptr, 0 ); auto str = (char*)tracy_malloc( sz ); sysctlbyname( "hw.machine", str, &sz, nullptr, 0 ); ptr += sprintf( ptr, "Device: %s\n", DecodeIosDevice( str ) ); tracy_free( str ); } #else ptr += sprintf( ptr, "CPU: unknown\n" ); #endif #ifdef __ANDROID__ char deviceModel[PROP_VALUE_MAX+1]; char deviceManufacturer[PROP_VALUE_MAX+1]; __system_property_get( "ro.product.model", deviceModel ); __system_property_get( "ro.product.manufacturer", deviceManufacturer ); ptr += sprintf( ptr, "Device: %s %s\n", deviceManufacturer, deviceModel ); #endif ptr += sprintf( ptr, "CPU cores: %i\n", std::thread::hardware_concurrency() ); #if defined _WIN32 MEMORYSTATUSEX statex; statex.dwLength = sizeof( statex ); GlobalMemoryStatusEx( &statex ); # ifdef _MSC_VER ptr += sprintf( ptr, "RAM: %I64u MB\n", statex.ullTotalPhys / 1024 / 1024 ); # else ptr += sprintf( ptr, "RAM: %llu MB\n", statex.ullTotalPhys / 1024 / 1024 ); # endif #elif defined __linux__ struct sysinfo sysInfo; sysinfo( &sysInfo ); ptr += sprintf( ptr, "RAM: %lu MB\n", sysInfo.totalram / 1024 / 1024 ); #elif defined __APPLE__ size_t memSize; size_t sz = sizeof( memSize ); sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 ); ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); #elif defined BSD size_t memSize; size_t sz = sizeof( memSize ); sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 ); ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); #elif defined __QNX__ struct asinfo_entry *entries = SYSPAGE_ENTRY(asinfo); size_t count = SYSPAGE_ENTRY_SIZE(asinfo) / sizeof(struct asinfo_entry); char *strings = SYSPAGE_ENTRY(strings)->data; uint64_t memSize = 0; size_t i; for (i = 0; i < count; i++) { struct asinfo_entry *entry = &entries[i]; if (strcmp(strings + entry->name, "ram") == 0) { memSize += entry->end - entry->start + 1; } } memSize = memSize / 1024 / 1024; ptr += sprintf( ptr, "RAM: %llu MB\n", memSize); #else ptr += sprintf( ptr, "RAM: unknown\n" ); #endif return buf; } static uint64_t GetPid() { #if defined _WIN32 return uint64_t( GetCurrentProcessId() ); #else return uint64_t( getpid() ); #endif } void Profiler::AckServerQuery() { QueueItem item; MemWrite( &item.hdr.type, QueueType::AckServerQueryNoop ); NeedDataSize( QueueDataSize[(int)QueueType::AckServerQueryNoop] ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckServerQueryNoop] ); } void Profiler::AckSymbolCodeNotAvailable() { QueueItem item; MemWrite( &item.hdr.type, QueueType::AckSymbolCodeNotAvailable ); NeedDataSize( QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); } static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, int& len, int port ) { static BroadcastMessage msg; msg.broadcastVersion = BroadcastVersion; msg.protocolVersion = ProtocolVersion; msg.listenPort = port; msg.pid = GetPid(); memcpy( msg.programName, procname, pnsz ); memset( msg.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); len = int( offsetof( BroadcastMessage, programName ) + pnsz + 1 ); return msg; } #if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER static DWORD s_profilerThreadId = 0; static DWORD s_symbolThreadId = 0; static char s_crashText[1024]; LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) { if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH; const unsigned ec = pExp->ExceptionRecord->ExceptionCode; auto msgPtr = s_crashText; switch( ec ) { case EXCEPTION_ACCESS_VIOLATION: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ACCESS_VIOLATION (0x%x). ", ec ); switch( pExp->ExceptionRecord->ExceptionInformation[0] ) { case 0: msgPtr += sprintf( msgPtr, "Read violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); break; case 1: msgPtr += sprintf( msgPtr, "Write violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); break; case 8: msgPtr += sprintf( msgPtr, "DEP violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); break; default: break; } break; case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED (0x%x). ", ec ); break; case EXCEPTION_DATATYPE_MISALIGNMENT: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_DATATYPE_MISALIGNMENT (0x%x). ", ec ); break; case EXCEPTION_FLT_DIVIDE_BY_ZERO: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO (0x%x). ", ec ); break; case EXCEPTION_ILLEGAL_INSTRUCTION: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ILLEGAL_INSTRUCTION (0x%x). ", ec ); break; case EXCEPTION_IN_PAGE_ERROR: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_IN_PAGE_ERROR (0x%x). ", ec ); break; case EXCEPTION_INT_DIVIDE_BY_ZERO: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO (0x%x). ", ec ); break; case EXCEPTION_PRIV_INSTRUCTION: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_PRIV_INSTRUCTION (0x%x). ", ec ); break; case EXCEPTION_STACK_OVERFLOW: msgPtr += sprintf( msgPtr, "Exception EXCEPTION_STACK_OVERFLOW (0x%x). ", ec ); break; default: return EXCEPTION_CONTINUE_SEARCH; } { GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" ); TracyQueuePrepare( QueueType::CrashReport ); item->crashReport.time = Profiler::GetTime(); item->crashReport.text = (uint64_t)s_crashText; TracyQueueCommit( crashReportThread ); } HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); if( h == INVALID_HANDLE_VALUE ) return EXCEPTION_CONTINUE_SEARCH; THREADENTRY32 te = { sizeof( te ) }; if( !Thread32First( h, &te ) ) { CloseHandle( h ); return EXCEPTION_CONTINUE_SEARCH; } const auto pid = GetCurrentProcessId(); const auto tid = GetCurrentThreadId(); do { if( te.th32OwnerProcessID == pid && te.th32ThreadID != tid && te.th32ThreadID != s_profilerThreadId && te.th32ThreadID != s_symbolThreadId ) { HANDLE th = OpenThread( THREAD_SUSPEND_RESUME, FALSE, te.th32ThreadID ); if( th != INVALID_HANDLE_VALUE ) { SuspendThread( th ); CloseHandle( th ); } } } while( Thread32Next( h, &te ) ); CloseHandle( h ); { TracyLfqPrepare( QueueType::Crash ); TracyLfqCommit; } std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); GetProfiler().RequestShutdown(); while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; return EXCEPTION_CONTINUE_SEARCH; } #endif static Profiler* s_instance = nullptr; static Thread* s_thread; #ifndef TRACY_NO_FRAME_IMAGE static Thread* s_compressThread; #endif #ifdef TRACY_HAS_CALLSTACK static Thread* s_symbolThread; std::atomic s_symbolThreadGone { false }; #endif #ifdef TRACY_HAS_SYSTEM_TRACING static Thread* s_sysTraceThread = nullptr; #endif #if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER # ifndef TRACY_CRASH_SIGNAL # define TRACY_CRASH_SIGNAL SIGPWR # endif static long s_profilerTid = 0; static long s_symbolTid = 0; static char s_crashText[1024]; static std::atomic s_alreadyCrashed( false ); static void ThreadFreezer( int /*signal*/ ) { for(;;) sleep( 1000 ); } static inline void HexPrint( char*& ptr, uint64_t val ) { if( val == 0 ) { *ptr++ = '0'; return; } static const char HexTable[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; char buf[16]; auto bptr = buf; do { *bptr++ = HexTable[val%16]; val /= 16; } while( val > 0 ); do { *ptr++ = *--bptr; } while( bptr != buf ); } static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) { bool expected = false; if( !s_alreadyCrashed.compare_exchange_strong( expected, true ) ) ThreadFreezer( signal ); struct sigaction act = {}; act.sa_handler = SIG_DFL; sigaction( SIGABRT, &act, nullptr ); auto msgPtr = s_crashText; switch( signal ) { case SIGILL: strcpy( msgPtr, "Illegal Instruction.\n" ); while( *msgPtr ) msgPtr++; switch( info->si_code ) { case ILL_ILLOPC: strcpy( msgPtr, "Illegal opcode.\n" ); break; case ILL_ILLOPN: strcpy( msgPtr, "Illegal operand.\n" ); break; case ILL_ILLADR: strcpy( msgPtr, "Illegal addressing mode.\n" ); break; case ILL_ILLTRP: strcpy( msgPtr, "Illegal trap.\n" ); break; case ILL_PRVOPC: strcpy( msgPtr, "Privileged opcode.\n" ); break; case ILL_PRVREG: strcpy( msgPtr, "Privileged register.\n" ); break; case ILL_COPROC: strcpy( msgPtr, "Coprocessor error.\n" ); break; case ILL_BADSTK: strcpy( msgPtr, "Internal stack error.\n" ); break; default: break; } break; case SIGFPE: strcpy( msgPtr, "Floating-point exception.\n" ); while( *msgPtr ) msgPtr++; switch( info->si_code ) { case FPE_INTDIV: strcpy( msgPtr, "Integer divide by zero.\n" ); break; case FPE_INTOVF: strcpy( msgPtr, "Integer overflow.\n" ); break; case FPE_FLTDIV: strcpy( msgPtr, "Floating-point divide by zero.\n" ); break; case FPE_FLTOVF: strcpy( msgPtr, "Floating-point overflow.\n" ); break; case FPE_FLTUND: strcpy( msgPtr, "Floating-point underflow.\n" ); break; case FPE_FLTRES: strcpy( msgPtr, "Floating-point inexact result.\n" ); break; case FPE_FLTINV: strcpy( msgPtr, "Floating-point invalid operation.\n" ); break; case FPE_FLTSUB: strcpy( msgPtr, "Subscript out of range.\n" ); break; default: break; } break; case SIGSEGV: strcpy( msgPtr, "Invalid memory reference.\n" ); while( *msgPtr ) msgPtr++; switch( info->si_code ) { case SEGV_MAPERR: strcpy( msgPtr, "Address not mapped to object.\n" ); break; case SEGV_ACCERR: strcpy( msgPtr, "Invalid permissions for mapped object.\n" ); break; # ifdef SEGV_BNDERR case SEGV_BNDERR: strcpy( msgPtr, "Failed address bound checks.\n" ); break; # endif # ifdef SEGV_PKUERR case SEGV_PKUERR: strcpy( msgPtr, "Access was denied by memory protection keys.\n" ); break; # endif default: break; } break; case SIGPIPE: strcpy( msgPtr, "Broken pipe.\n" ); while( *msgPtr ) msgPtr++; break; case SIGBUS: strcpy( msgPtr, "Bus error.\n" ); while( *msgPtr ) msgPtr++; switch( info->si_code ) { case BUS_ADRALN: strcpy( msgPtr, "Invalid address alignment.\n" ); break; case BUS_ADRERR: strcpy( msgPtr, "Nonexistent physical address.\n" ); break; case BUS_OBJERR: strcpy( msgPtr, "Object-specific hardware error.\n" ); break; # ifdef BUS_MCEERR_AR case BUS_MCEERR_AR: strcpy( msgPtr, "Hardware memory error consumed on a machine check; action required.\n" ); break; # endif # ifdef BUS_MCEERR_AO case BUS_MCEERR_AO: strcpy( msgPtr, "Hardware memory error detected in process but not consumed; action optional.\n" ); break; # endif default: break; } break; case SIGABRT: strcpy( msgPtr, "Abort signal from abort().\n" ); break; default: abort(); } while( *msgPtr ) msgPtr++; if( signal != SIGPIPE ) { strcpy( msgPtr, "Fault address: 0x" ); while( *msgPtr ) msgPtr++; HexPrint( msgPtr, uint64_t( info->si_addr ) ); *msgPtr++ = '\n'; } { GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" ); TracyQueuePrepare( QueueType::CrashReport ); item->crashReport.time = Profiler::GetTime(); item->crashReport.text = (uint64_t)s_crashText; TracyQueueCommit( crashReportThread ); } DIR* dp = opendir( "/proc/self/task" ); if( !dp ) abort(); const auto selfTid = syscall( SYS_gettid ); struct dirent* ep; while( ( ep = readdir( dp ) ) != nullptr ) { if( ep->d_name[0] == '.' ) continue; int tid = atoi( ep->d_name ); if( tid != selfTid && tid != s_profilerTid && tid != s_symbolTid ) { syscall( SYS_tkill, tid, TRACY_CRASH_SIGNAL ); } } closedir( dp ); #ifdef TRACY_HAS_CALLSTACK if( selfTid == s_symbolTid ) s_symbolThreadGone.store( true, std::memory_order_release ); #endif TracyLfqPrepare( QueueType::Crash ); TracyLfqCommit; std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); GetProfiler().RequestShutdown(); while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; abort(); } #endif enum { QueuePrealloc = 256 * 1024 }; TRACY_API int64_t GetFrequencyQpc() { #if defined _WIN32 LARGE_INTEGER t; QueryPerformanceFrequency( &t ); return t.QuadPart; #else return 0; #endif } #ifdef TRACY_DELAYED_INIT struct ThreadNameData; TRACY_API moodycamel::ConcurrentQueue& GetQueue(); struct ProfilerData { int64_t initTime = SetupHwTimer(); moodycamel::ConcurrentQueue queue; Profiler profiler; std::atomic lockCounter { 0 }; std::atomic gpuCtxCounter { 0 }; std::atomic threadNameData { nullptr }; }; struct ProducerWrapper { ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {} moodycamel::ProducerToken detail; tracy::moodycamel::ConcurrentQueue::ExplicitProducer* ptr; }; struct ProfilerThreadData { ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {} ProducerWrapper token; GpuCtxWrapper gpuCtx; # ifdef TRACY_ON_DEMAND LuaZoneState luaZoneState; # endif }; std::atomic RpInitDone { 0 }; std::atomic RpInitLock { 0 }; thread_local bool RpThreadInitDone = false; thread_local bool RpThreadShutdown = false; # ifdef TRACY_MANUAL_LIFETIME ProfilerData* s_profilerData = nullptr; static ProfilerThreadData& GetProfilerThreadData(); static std::atomic s_isProfilerStarted { false }; TRACY_API void StartupProfiler() { s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); new (s_profilerData) ProfilerData(); s_profilerData->profiler.SpawnWorkerThreads(); GetProfilerThreadData().token = ProducerWrapper( *s_profilerData ); s_isProfilerStarted.store( true, std::memory_order_seq_cst ); } static ProfilerData& GetProfilerData() { assert( s_profilerData ); return *s_profilerData; } TRACY_API void ShutdownProfiler() { s_isProfilerStarted.store( false, std::memory_order_seq_cst ); s_profilerData->~ProfilerData(); tracy_free( s_profilerData ); s_profilerData = nullptr; rpmalloc_finalize(); RpThreadInitDone = false; RpInitDone.store( 0, std::memory_order_release ); } TRACY_API bool IsProfilerStarted() { return s_isProfilerStarted.load( std::memory_order_seq_cst ); } # else static std::atomic profilerDataLock { 0 }; static std::atomic profilerData { nullptr }; static ProfilerData& GetProfilerData() { auto ptr = profilerData.load( std::memory_order_acquire ); if( !ptr ) { int expected = 0; while( !profilerDataLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } ptr = profilerData.load( std::memory_order_acquire ); if( !ptr ) { ptr = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); new (ptr) ProfilerData(); profilerData.store( ptr, std::memory_order_release ); } profilerDataLock.store( 0, std::memory_order_release ); } return *ptr; } # endif // GCC prior to 8.4 had a bug with function-inline thread_local variables. Versions of glibc beginning with // 2.18 may attempt to work around this issue, which manifests as a crash while running static destructors // if this function is compiled into a shared object. Unfortunately, centos7 ships with glibc 2.17. If running // on old GCC, use the old-fashioned way as a workaround // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85400 #if !defined(__clang__) && defined(__GNUC__) && ((__GNUC__ < 8) || ((__GNUC__ == 8) && (__GNUC_MINOR__ < 4))) struct ProfilerThreadDataKey { public: ProfilerThreadDataKey() { int val = pthread_key_create(&m_key, sDestructor); static_cast(val); // unused assert(val == 0); } ~ProfilerThreadDataKey() { int val = pthread_key_delete(m_key); static_cast(val); // unused assert(val == 0); } ProfilerThreadData& get() { void* p = pthread_getspecific(m_key); if (!p) { p = (ProfilerThreadData*)tracy_malloc( sizeof( ProfilerThreadData ) ); new (p) ProfilerThreadData(GetProfilerData()); pthread_setspecific(m_key, p); } return *static_cast(p); } private: pthread_key_t m_key; static void sDestructor(void* p) { ((ProfilerThreadData*)p)->~ProfilerThreadData(); tracy_free(p); } }; static ProfilerThreadData& GetProfilerThreadData() { static ProfilerThreadDataKey key; return key.get(); } #else static ProfilerThreadData& GetProfilerThreadData() { thread_local ProfilerThreadData data( GetProfilerData() ); return data; } #endif TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; } TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; } TRACY_API moodycamel::ConcurrentQueue& GetQueue() { return GetProfilerData().queue; } TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; } TRACY_API std::atomic& GetLockCounter() { return GetProfilerData().lockCounter; } TRACY_API std::atomic& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; } TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; } TRACY_API uint32_t GetThreadHandle() { return detail::GetThreadHandleImpl(); } std::atomic& GetThreadNameData() { return GetProfilerData().threadNameData; } # ifdef TRACY_ON_DEMAND TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; } # endif # ifndef TRACY_MANUAL_LIFETIME namespace { const auto& __profiler_init = GetProfiler(); } # endif #else // MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this. // 1a. But s_queue is needed for initialization of variables in point 2. extern moodycamel::ConcurrentQueue s_queue; // 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread. thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue ); thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) }; thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() }; # ifdef _MSC_VER // 1. Initialize these static variables before all other variables. # pragma warning( disable : 4075 ) # pragma init_seg( ".CRT$XCB" ) # endif static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() }; std::atomic init_order(102) RpInitDone( 0 ); std::atomic init_order(102) RpInitLock( 0 ); thread_local bool RpThreadInitDone = false; thread_local bool RpThreadShutdown = false; moodycamel::ConcurrentQueue init_order(103) s_queue( QueuePrealloc ); std::atomic init_order(104) s_lockCounter( 0 ); std::atomic init_order(104) s_gpuCtxCounter( 0 ); thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr }; struct ThreadNameData; static std::atomic init_order(104) s_threadNameDataInstance( nullptr ); std::atomic& s_threadNameData = s_threadNameDataInstance; # ifdef TRACY_ON_DEMAND thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false }; # endif static Profiler init_order(105) s_profiler; TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken() { return s_token.ptr; } TRACY_API Profiler& GetProfiler() { return s_profiler; } TRACY_API moodycamel::ConcurrentQueue& GetQueue() { return s_queue; } TRACY_API int64_t GetInitTime() { return s_initTime.val; } TRACY_API std::atomic& GetLockCounter() { return s_lockCounter; } TRACY_API std::atomic& GetGpuCtxCounter() { return s_gpuCtxCounter; } TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; } TRACY_API uint32_t GetThreadHandle() { return s_threadHandle.val; } std::atomic& GetThreadNameData() { return s_threadNameData; } # ifdef TRACY_ON_DEMAND TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } # endif #endif TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } constexpr static size_t SafeSendBufferSize = 65536; Profiler::Profiler() : m_timeBegin( 0 ) , m_mainThread( detail::GetThreadHandleImpl() ) , m_epoch( std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count() ) , m_shutdown( false ) , m_shutdownManual( false ) , m_shutdownFinished( false ) , m_sock( nullptr ) , m_broadcast( nullptr ) , m_noExit( false ) , m_userPort( 0 ) , m_zoneId( 1 ) , m_samplingPeriod( 0 ) , m_stream( LZ4_createStream() ) , m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) ) , m_bufferOffset( 0 ) , m_bufferStart( 0 ) , m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) ) , m_serialQueue( 1024*1024 ) , m_serialDequeue( 1024*1024 ) #ifndef TRACY_NO_FRAME_IMAGE , m_fiQueue( 16 ) , m_fiDequeue( 16 ) #endif , m_symbolQueue( 8*1024 ) , m_frameCount( 0 ) , m_isConnected( false ) #ifdef TRACY_ON_DEMAND , m_connectionId( 0 ) , m_deferredQueue( 64*1024 ) #endif , m_paramCallback( nullptr ) , m_sourceCallback( nullptr ) , m_queryImage( nullptr ) , m_queryData( nullptr ) , m_crashHandlerInstalled( false ) , m_programName( nullptr ) { assert( !s_instance ); s_instance = this; #ifndef TRACY_DELAYED_INIT # ifdef _MSC_VER // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. s_token_detail = moodycamel::ProducerToken( s_queue ); s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; s_threadHandle = ThreadHandleWrapper { m_mainThread }; # endif #endif CalibrateTimer(); CalibrateDelay(); ReportTopology(); #ifdef __linux__ m_kcore = (KCore*)tracy_malloc( sizeof( KCore ) ); new(m_kcore) KCore(); #endif #ifndef TRACY_NO_EXIT const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" ); if( noExitEnv && noExitEnv[0] == '1' ) { m_noExit = true; } #endif const char* userPort = GetEnvVar( "TRACY_PORT" ); if( userPort ) { m_userPort = atoi( userPort ); } m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); #ifndef _WIN32 pipe(m_pipe); # if defined __APPLE__ || defined BSD // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default m_pipeBufSize = 16384; # else m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; while( fcntl( m_pipe[0], F_SETPIPE_SZ, m_pipeBufSize ) < 0 && errno == EPERM ) m_pipeBufSize /= 2; // too big; reduce m_pipeBufSize = fcntl( m_pipe[0], F_GETPIPE_SZ ); # endif fcntl( m_pipe[1], F_SETFL, O_NONBLOCK ); #endif #if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) SpawnWorkerThreads(); #endif } void Profiler::InstallCrashHandler() { #if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER struct sigaction threadFreezer = {}; threadFreezer.sa_handler = ThreadFreezer; sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); struct sigaction crashHandler = {}; crashHandler.sa_sigaction = CrashHandler; crashHandler.sa_flags = SA_SIGINFO; sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); #endif #if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only // want to catch unhandled exceptions. m_prevHandler = SetUnhandledExceptionFilter( CrashFilter ); #endif #ifndef TRACY_NO_CRASH_HANDLER m_crashHandlerInstalled = true; #endif } void Profiler::RemoveCrashHandler() { #if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER if( m_crashHandlerInstalled ) { auto prev = SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER)m_prevHandler ); if( prev != CrashFilter ) SetUnhandledExceptionFilter( prev ); // A different exception filter was installed over ours => put it back } #endif #if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER if( m_crashHandlerInstalled ) { auto restore = []( int signum, struct sigaction* prev ) { struct sigaction old; sigaction( signum, prev, &old ); if( old.sa_sigaction != CrashHandler ) sigaction( signum, &old, nullptr ); // A different signal handler was installed over ours => put it back }; restore( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr ); restore( SIGILL, &m_prevSignal.ill ); restore( SIGFPE, &m_prevSignal.fpe ); restore( SIGSEGV, &m_prevSignal.segv ); restore( SIGPIPE, &m_prevSignal.pipe ); restore( SIGBUS, &m_prevSignal.bus ); restore( SIGABRT, &m_prevSignal.abrt ); } #endif m_crashHandlerInstalled = false; } void Profiler::SpawnWorkerThreads() { #ifdef TRACY_HAS_SYSTEM_TRACING // use TRACY_NO_SYS_TRACE=1 to force disabling sys tracing (even if available in the underlying system) // as it can have significant impact on the size of the traces const char* noSysTrace = GetEnvVar( "TRACY_NO_SYS_TRACE" ); const bool disableSystrace = (noSysTrace && noSysTrace[0] == '1'); if( disableSystrace ) { TracyDebug("TRACY: Sys Trace was disabled by 'TRACY_NO_SYS_TRACE=1'\n"); } else if( SysTraceStart( m_samplingPeriod ) ) { s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); } #endif s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_thread) Thread( LaunchWorker, this ); #ifndef TRACY_NO_FRAME_IMAGE s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_compressThread) Thread( LaunchCompressWorker, this ); #endif #ifdef TRACY_HAS_CALLSTACK s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_symbolThread) Thread( LaunchSymbolWorker, this ); #endif #if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER s_profilerThreadId = GetThreadId( s_thread->Handle() ); # ifdef TRACY_HAS_CALLSTACK s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); # endif #endif #ifdef TRACY_HAS_CALLSTACK InitCallstackCritical(); #endif m_timeBegin.store( GetTime(), std::memory_order_relaxed ); } Profiler::~Profiler() { m_shutdown.store( true, std::memory_order_relaxed ); RemoveCrashHandler(); #ifdef TRACY_HAS_SYSTEM_TRACING if( s_sysTraceThread ) { SysTraceStop(); s_sysTraceThread->~Thread(); tracy_free( s_sysTraceThread ); } #endif #ifdef TRACY_HAS_CALLSTACK s_symbolThread->~Thread(); tracy_free( s_symbolThread ); #endif #ifndef TRACY_NO_FRAME_IMAGE s_compressThread->~Thread(); tracy_free( s_compressThread ); #endif s_thread->~Thread(); tracy_free( s_thread ); #ifdef TRACY_HAS_CALLSTACK EndCallstack(); #endif #ifdef __linux__ m_kcore->~KCore(); tracy_free( m_kcore ); #endif #ifndef _WIN32 close( m_pipe[0] ); close( m_pipe[1] ); #endif tracy_free( m_safeSendBuffer ); tracy_free( m_lz4Buf ); tracy_free( m_buffer ); LZ4_freeStream( (LZ4_stream_t*)m_stream ); if( m_sock ) { m_sock->~Socket(); tracy_free( m_sock ); } if( m_broadcast ) { m_broadcast->~UdpBroadcast(); tracy_free( m_broadcast ); } assert( s_instance ); s_instance = nullptr; } bool Profiler::ShouldExit() { return s_instance->m_shutdown.load( std::memory_order_relaxed ); } void Profiler::Worker() { #if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER s_profilerTid = syscall( SYS_gettid ); #endif ThreadExitHandler threadExitHandler; SetThreadName( "Tracy Profiler" ); #ifdef TRACY_DATA_PORT const bool dataPortSearch = false; auto dataPort = m_userPort != 0 ? m_userPort : TRACY_DATA_PORT; #else const bool dataPortSearch = m_userPort == 0; auto dataPort = m_userPort != 0 ? m_userPort : 8086; #endif #ifdef TRACY_BROADCAST_PORT const auto broadcastPort = TRACY_BROADCAST_PORT; #else const auto broadcastPort = 8086; #endif while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); #ifdef TRACY_USE_RPMALLOC rpmalloc_thread_initialize(); #endif m_exectime = 0; const auto execname = GetProcessExecutablePath(); if( execname ) { struct stat st; if( stat( execname, &st ) == 0 ) { m_exectime = (uint64_t)st.st_mtime; } } const auto procname = GetProcessName(); const auto pnsz = std::min( strlen( procname ), WelcomeMessageProgramNameSize - 1 ); const auto hostinfo = GetHostInfo(); const auto hisz = std::min( strlen( hostinfo ), WelcomeMessageHostInfoSize - 1 ); const uint64_t pid = GetPid(); uint8_t flags = 0; #ifdef TRACY_ON_DEMAND flags |= WelcomeFlag::OnDemand; #endif #ifdef __APPLE__ flags |= WelcomeFlag::IsApple; #endif #ifndef TRACY_NO_CODE_TRANSFER flags |= WelcomeFlag::CodeTransfer; #endif #ifdef _WIN32 flags |= WelcomeFlag::CombineSamples; # ifndef TRACY_NO_CONTEXT_SWITCH flags |= WelcomeFlag::IdentifySamples; # endif #endif #if defined __i386 || defined _M_IX86 uint8_t cpuArch = CpuArchX86; #elif defined __x86_64__ || defined _M_X64 uint8_t cpuArch = CpuArchX64; #elif defined __aarch64__ uint8_t cpuArch = CpuArchArm64; #elif defined __ARM_ARCH uint8_t cpuArch = CpuArchArm32; #else uint8_t cpuArch = CpuArchUnknown; #endif #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 uint32_t regs[4]; char manufacturer[12]; CpuId( regs, 0 ); memcpy( manufacturer, regs+1, 4 ); memcpy( manufacturer+4, regs+3, 4 ); memcpy( manufacturer+8, regs+2, 4 ); CpuId( regs, 1 ); uint32_t cpuId = ( regs[0] & 0xFFF ) | ( ( regs[0] & 0xFFF0000 ) >> 4 ); #else const char manufacturer[12] = {}; uint32_t cpuId = 0; #endif WelcomeMessage welcome; MemWrite( &welcome.timerMul, m_timerMul ); MemWrite( &welcome.initBegin, GetInitTime() ); MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) ); MemWrite( &welcome.delay, m_delay ); MemWrite( &welcome.resolution, m_resolution ); MemWrite( &welcome.epoch, m_epoch ); MemWrite( &welcome.exectime, m_exectime ); MemWrite( &welcome.pid, pid ); MemWrite( &welcome.samplingPeriod, m_samplingPeriod ); MemWrite( &welcome.flags, flags ); MemWrite( &welcome.cpuArch, cpuArch ); memcpy( welcome.cpuManufacturer, manufacturer, 12 ); MemWrite( &welcome.cpuId, cpuId ); memcpy( welcome.programName, procname, pnsz ); memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); memcpy( welcome.hostInfo, hostinfo, hisz ); memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz ); moodycamel::ConsumerToken token( GetQueue() ); ListenSocket listen; bool isListening = false; if( !dataPortSearch ) { isListening = listen.Listen( dataPort, 4 ); } else { for( uint32_t i=0; i<20; i++ ) { if( listen.Listen( dataPort+i, 4 ) ) { dataPort += i; isListening = true; break; } } } if( !isListening ) { for(;;) { if( ShouldExit() ) { m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } ClearQueues( token ); std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); } } #ifndef TRACY_NO_BROADCAST m_broadcast = (UdpBroadcast*)tracy_malloc( sizeof( UdpBroadcast ) ); new(m_broadcast) UdpBroadcast(); # ifdef TRACY_ONLY_LOCALHOST const char* addr = "127.255.255.255"; # elif defined TRACY_CLIENT_ADDRESS const char* addr = TRACY_CLIENT_ADDRESS; # elif defined __QNX__ // global broadcast address of 255.255.255.255 is not well-supported by QNX, // use the interface broadcast address instead, e.g. "const char* addr = 192.168.1.255;" # error Need to specify TRACY_CLIENT_ADDRESS for a QNX target. # else const char* addr = "255.255.255.255"; # endif if( !m_broadcast->Open( addr, broadcastPort ) ) { m_broadcast->~UdpBroadcast(); tracy_free( m_broadcast ); m_broadcast = nullptr; } #endif int broadcastLen = 0; auto& broadcastMsg = GetBroadcastMessage( procname, pnsz, broadcastLen, dataPort ); uint64_t lastBroadcast = 0; // Connections loop. // Each iteration of the loop handles whole connection. Multiple iterations will only // happen in the on-demand mode or when handshake fails. for(;;) { // Wait for incoming connection for(;;) { #ifndef TRACY_NO_EXIT if( !m_noExit && ShouldExit() ) { if( m_broadcast ) { broadcastMsg.activeTime = -1; m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); } m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } #endif m_sock = listen.Accept(); if( m_sock ) break; #ifndef TRACY_ON_DEMAND ProcessSysTime(); # ifdef TRACY_HAS_SYSPOWER m_sysPower.Tick(); # endif #endif if( m_broadcast ) { const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); if( t - lastBroadcast > 3000000000 ) // 3s { m_programNameLock.lock(); if( m_programName ) { broadcastMsg = GetBroadcastMessage( m_programName, strlen( m_programName ), broadcastLen, dataPort ); m_programName = nullptr; } m_programNameLock.unlock(); lastBroadcast = t; const auto ts = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count(); broadcastMsg.activeTime = int32_t( ts - m_epoch ); assert( broadcastMsg.activeTime >= 0 ); m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); } } } if( m_broadcast ) { lastBroadcast = 0; broadcastMsg.activeTime = -1; m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); } // Handshake { char shibboleth[HandshakeShibbolethSize]; auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 2000 ); if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) { m_sock->~Socket(); tracy_free( m_sock ); m_sock = nullptr; continue; } uint32_t protocolVersion; res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 2000 ); if( !res ) { m_sock->~Socket(); tracy_free( m_sock ); m_sock = nullptr; continue; } if( protocolVersion != ProtocolVersion ) { HandshakeStatus status = HandshakeProtocolMismatch; m_sock->Send( &status, sizeof( status ) ); m_sock->~Socket(); tracy_free( m_sock ); m_sock = nullptr; continue; } } #ifdef TRACY_ON_DEMAND const auto currentTime = GetTime(); ClearQueues( token ); m_connectionId.fetch_add( 1, std::memory_order_release ); #endif m_isConnected.store( true, std::memory_order_release ); InstallCrashHandler(); HandshakeStatus handshake = HandshakeWelcome; m_sock->Send( &handshake, sizeof( handshake ) ); LZ4_resetStream( (LZ4_stream_t*)m_stream ); m_sock->Send( &welcome, sizeof( welcome ) ); m_threadCtx = 0; m_refTimeSerial = 0; m_refTimeCtx = 0; m_refTimeGpu = 0; #ifdef TRACY_ON_DEMAND OnDemandPayloadMessage onDemand; onDemand.frames = m_frameCount.load( std::memory_order_relaxed ); onDemand.currentTime = currentTime; m_sock->Send( &onDemand, sizeof( onDemand ) ); m_deferredLock.lock(); for( auto& item : m_deferredQueue ) { uint64_t ptr; uint16_t size; const auto idx = MemRead( &item.hdr.idx ); switch( (QueueType)idx ) { case QueueType::MessageAppInfo: ptr = MemRead( &item.messageFat.text ); size = MemRead( &item.messageFat.size ); SendSingleString( (const char*)ptr, size ); break; case QueueType::LockName: ptr = MemRead( &item.lockNameFat.name ); size = MemRead( &item.lockNameFat.size ); SendSingleString( (const char*)ptr, size ); break; case QueueType::GpuContextName: ptr = MemRead( &item.gpuContextNameFat.ptr ); size = MemRead( &item.gpuContextNameFat.size ); SendSingleString( (const char*)ptr, size ); break; default: break; } AppendData( &item, QueueDataSize[idx] ); } m_deferredLock.unlock(); #endif // Main communications loop int keepAlive = 0; for(;;) { ProcessSysTime(); #ifdef TRACY_HAS_SYSPOWER m_sysPower.Tick(); #endif const auto status = Dequeue( token ); const auto serialStatus = DequeueSerial(); if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) { break; } else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) { if( ShouldExit() ) break; if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) break; } if( keepAlive == 500 ) { QueueItem ka; ka.hdr.type = QueueType::KeepAlive; AppendData( &ka, QueueDataSize[ka.hdr.idx] ); if( !CommitData() ) break; keepAlive = 0; } else if( !m_sock->HasData() ) { keepAlive++; std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); } } else { keepAlive = 0; } bool connActive = true; while( m_sock->HasData() ) { connActive = HandleServerQuery(); if( !connActive ) break; } if( !connActive ) break; } if( ShouldExit() ) break; m_isConnected.store( false, std::memory_order_release ); RemoveCrashHandler(); #ifdef TRACY_ON_DEMAND m_bufferOffset = 0; m_bufferStart = 0; #endif m_sock->~Socket(); tracy_free( m_sock ); m_sock = nullptr; #ifndef TRACY_ON_DEMAND // Client is no longer available here. Accept incoming connections, but reject handshake. for(;;) { if( ShouldExit() ) { m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } ClearQueues( token ); m_sock = listen.Accept(); if( m_sock ) { char shibboleth[HandshakeShibbolethSize]; auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 1000 ); if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) { m_sock->~Socket(); tracy_free( m_sock ); m_sock = nullptr; continue; } uint32_t protocolVersion; res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 1000 ); if( !res ) { m_sock->~Socket(); tracy_free( m_sock ); m_sock = nullptr; continue; } HandshakeStatus status = HandshakeNotAvailable; m_sock->Send( &status, sizeof( status ) ); m_sock->~Socket(); tracy_free( m_sock ); } } #endif } // End of connections loop // Wait for symbols thread to terminate. Symbol resolution will continue in this thread. #ifdef TRACY_HAS_CALLSTACK while( s_symbolThreadGone.load() == false ) { YieldThread(); } #endif // Client is exiting. Send items remaining in queues. for(;;) { const auto status = Dequeue( token ); const auto serialStatus = DequeueSerial(); if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) { m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) { if( m_bufferOffset != m_bufferStart ) CommitData(); break; } while( m_sock->HasData() ) { if( !HandleServerQuery() ) { m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } } #ifdef TRACY_HAS_CALLSTACK for(;;) { auto si = m_symbolQueue.front(); if( !si ) break; HandleSymbolQueueItem( *si ); m_symbolQueue.pop(); } #endif } // Send client termination notice to the server QueueItem terminate; MemWrite( &terminate.hdr.type, QueueType::Terminate ); if( !SendData( (const char*)&terminate, 1 ) ) { m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } // Handle remaining server queries for(;;) { while( m_sock->HasData() ) { if( !HandleServerQuery() ) { m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } } #ifdef TRACY_HAS_CALLSTACK for(;;) { auto si = m_symbolQueue.front(); if( !si ) break; HandleSymbolQueueItem( *si ); m_symbolQueue.pop(); } #endif const auto status = Dequeue( token ); const auto serialStatus = DequeueSerial(); if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) { m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) { m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } } } } #ifndef TRACY_NO_FRAME_IMAGE void Profiler::CompressWorker() { ThreadExitHandler threadExitHandler; SetThreadName( "Tracy DXT1" ); while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); #ifdef TRACY_USE_RPMALLOC rpmalloc_thread_initialize(); #endif for(;;) { const auto shouldExit = ShouldExit(); { bool lockHeld = true; while( !m_fiLock.try_lock() ) { if( m_shutdownManual.load( std::memory_order_relaxed ) ) { lockHeld = false; break; } } if( !m_fiQueue.empty() ) m_fiQueue.swap( m_fiDequeue ); if( lockHeld ) { m_fiLock.unlock(); } } const auto sz = m_fiDequeue.size(); if( sz > 0 ) { auto fi = m_fiDequeue.data(); auto end = fi + sz; while( fi != end ) { const auto w = fi->w; const auto h = fi->h; const auto csz = size_t( w * h / 2 ); auto etc1buf = (char*)tracy_malloc( csz ); CompressImageDxt1( (const char*)fi->image, etc1buf, w, h ); tracy_free( fi->image ); TracyLfqPrepare( QueueType::FrameImage ); MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf ); MemWrite( &item->frameImageFat.frame, fi->frame ); MemWrite( &item->frameImageFat.w, w ); MemWrite( &item->frameImageFat.h, h ); uint8_t flip = fi->flip; MemWrite( &item->frameImageFat.flip, flip ); TracyLfqCommit; fi++; } m_fiDequeue.clear(); } else { std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); } if( shouldExit ) { return; } } } #endif static void FreeAssociatedMemory( const QueueItem& item ) { if( item.hdr.idx >= (int)QueueType::Terminate ) return; uint64_t ptr; switch( item.hdr.type ) { case QueueType::ZoneText: case QueueType::ZoneName: ptr = MemRead( &item.zoneTextFat.text ); tracy_free( (void*)ptr ); break; case QueueType::MessageColor: case QueueType::MessageColorCallstack: ptr = MemRead( &item.messageColorFat.text ); tracy_free( (void*)ptr ); break; case QueueType::Message: case QueueType::MessageCallstack: #ifndef TRACY_ON_DEMAND case QueueType::MessageAppInfo: #endif ptr = MemRead( &item.messageFat.text ); tracy_free( (void*)ptr ); break; case QueueType::ZoneBeginAllocSrcLoc: case QueueType::ZoneBeginAllocSrcLocCallstack: ptr = MemRead( &item.zoneBegin.srcloc ); tracy_free( (void*)ptr ); break; case QueueType::GpuZoneBeginAllocSrcLoc: case QueueType::GpuZoneBeginAllocSrcLocCallstack: case QueueType::GpuZoneBeginAllocSrcLocSerial: case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: ptr = MemRead( &item.gpuZoneBegin.srcloc ); tracy_free( (void*)ptr ); break; case QueueType::CallstackSerial: case QueueType::Callstack: ptr = MemRead( &item.callstackFat.ptr ); tracy_free( (void*)ptr ); break; case QueueType::CallstackAlloc: ptr = MemRead( &item.callstackAllocFat.nativePtr ); tracy_free( (void*)ptr ); ptr = MemRead( &item.callstackAllocFat.ptr ); tracy_free( (void*)ptr ); break; case QueueType::CallstackSample: case QueueType::CallstackSampleContextSwitch: ptr = MemRead( &item.callstackSampleFat.ptr ); tracy_free( (void*)ptr ); break; case QueueType::FrameImage: ptr = MemRead( &item.frameImageFat.image ); tracy_free( (void*)ptr ); break; #ifdef TRACY_HAS_CALLSTACK case QueueType::CallstackFrameSize: { InitRpmalloc(); auto size = MemRead( &item.callstackFrameSizeFat.size ); auto data = (const CallstackEntry*)MemRead( &item.callstackFrameSizeFat.data ); for( uint8_t i=0; i( &item.symbolInformationFat.needFree ); if( needFree ) { ptr = MemRead( &item.symbolInformationFat.fileString ); tracy_free( (void*)ptr ); } break; } case QueueType::SymbolCodeMetadata: ptr = MemRead( &item.symbolCodeMetadata.ptr ); tracy_free( (void*)ptr ); break; #endif #ifndef TRACY_ON_DEMAND case QueueType::LockName: ptr = MemRead( &item.lockNameFat.name ); tracy_free( (void*)ptr ); break; case QueueType::GpuContextName: ptr = MemRead( &item.gpuContextNameFat.ptr ); tracy_free( (void*)ptr ); break; #endif #ifdef TRACY_ON_DEMAND case QueueType::MessageAppInfo: case QueueType::GpuContextName: // Don't free memory associated with deferred messages. break; #endif #ifdef TRACY_HAS_SYSTEM_TRACING case QueueType::ExternalNameMetadata: ptr = MemRead( &item.externalNameMetadata.name ); tracy_free( (void*)ptr ); ptr = MemRead( &item.externalNameMetadata.threadName ); tracy_free_fast( (void*)ptr ); break; #endif case QueueType::SourceCodeMetadata: ptr = MemRead( &item.sourceCodeMetadata.ptr ); tracy_free( (void*)ptr ); break; default: break; } } void Profiler::ClearQueues( moodycamel::ConsumerToken& token ) { for(;;) { const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, []( QueueItem* item, size_t sz ) { assert( sz > 0 ); while( sz-- > 0 ) FreeAssociatedMemory( *item++ ); } ); if( sz == 0 ) break; } ClearSerial(); } void Profiler::ClearSerial() { bool lockHeld = true; while( !m_serialLock.try_lock() ) { if( m_shutdownManual.load( std::memory_order_relaxed ) ) { lockHeld = false; break; } } for( auto& v : m_serialQueue ) FreeAssociatedMemory( v ); m_serialQueue.clear(); if( lockHeld ) { m_serialLock.unlock(); } for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v ); m_serialDequeue.clear(); } Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) { bool connectionLost = false; const auto sz = GetQueue().try_dequeue_bulk_single( token, [this, &connectionLost] ( const uint32_t& threadId ) { if( ThreadCtxCheck( threadId ) == ThreadCtxStatus::ConnectionLost ) connectionLost = true; }, [this, &connectionLost] ( QueueItem* item, size_t sz ) { if( connectionLost ) return; InitRpmalloc(); assert( sz > 0 ); int64_t refThread = m_refTimeThread; int64_t refCtx = m_refTimeCtx; int64_t refGpu = m_refTimeGpu; while( sz-- > 0 ) { uint64_t ptr; uint16_t size; auto idx = MemRead( &item->hdr.idx ); if( idx < (int)QueueType::Terminate ) { switch( (QueueType)idx ) { case QueueType::ZoneText: case QueueType::ZoneName: ptr = MemRead( &item->zoneTextFat.text ); size = MemRead( &item->zoneTextFat.size ); SendSingleString( (const char*)ptr, size ); tracy_free_fast( (void*)ptr ); break; case QueueType::Message: case QueueType::MessageCallstack: ptr = MemRead( &item->messageFat.text ); size = MemRead( &item->messageFat.size ); SendSingleString( (const char*)ptr, size ); tracy_free_fast( (void*)ptr ); break; case QueueType::MessageColor: case QueueType::MessageColorCallstack: ptr = MemRead( &item->messageColorFat.text ); size = MemRead( &item->messageColorFat.size ); SendSingleString( (const char*)ptr, size ); tracy_free_fast( (void*)ptr ); break; case QueueType::MessageAppInfo: ptr = MemRead( &item->messageFat.text ); size = MemRead( &item->messageFat.size ); SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND tracy_free_fast( (void*)ptr ); #endif break; case QueueType::ZoneBeginAllocSrcLoc: case QueueType::ZoneBeginAllocSrcLocCallstack: { int64_t t = MemRead( &item->zoneBegin.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->zoneBegin.time, dt ); ptr = MemRead( &item->zoneBegin.srcloc ); SendSourceLocationPayload( ptr ); tracy_free_fast( (void*)ptr ); break; } case QueueType::Callstack: ptr = MemRead( &item->callstackFat.ptr ); SendCallstackPayload( ptr ); tracy_free_fast( (void*)ptr ); break; case QueueType::CallstackAlloc: ptr = MemRead( &item->callstackAllocFat.nativePtr ); if( ptr != 0 ) { CutCallstack( (void*)ptr, "lua_pcall" ); SendCallstackPayload( ptr ); tracy_free_fast( (void*)ptr ); } ptr = MemRead( &item->callstackAllocFat.ptr ); SendCallstackAlloc( ptr ); tracy_free_fast( (void*)ptr ); break; case QueueType::CallstackSample: case QueueType::CallstackSampleContextSwitch: { ptr = MemRead( &item->callstackSampleFat.ptr ); SendCallstackPayload64( ptr ); tracy_free_fast( (void*)ptr ); int64_t t = MemRead( &item->callstackSampleFat.time ); int64_t dt = t - refCtx; refCtx = t; MemWrite( &item->callstackSampleFat.time, dt ); break; } case QueueType::FrameImage: { ptr = MemRead( &item->frameImageFat.image ); const auto w = MemRead( &item->frameImageFat.w ); const auto h = MemRead( &item->frameImageFat.h ); const auto csz = size_t( w * h / 2 ); SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); tracy_free_fast( (void*)ptr ); break; } case QueueType::ZoneBegin: case QueueType::ZoneBeginCallstack: { int64_t t = MemRead( &item->zoneBegin.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->zoneBegin.time, dt ); break; } case QueueType::ZoneEnd: { int64_t t = MemRead( &item->zoneEnd.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->zoneEnd.time, dt ); break; } case QueueType::GpuZoneBegin: case QueueType::GpuZoneBeginCallstack: { int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->gpuZoneBegin.cpuTime, dt ); break; } case QueueType::GpuZoneBeginAllocSrcLoc: case QueueType::GpuZoneBeginAllocSrcLocCallstack: { int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->gpuZoneBegin.cpuTime, dt ); ptr = MemRead( &item->gpuZoneBegin.srcloc ); SendSourceLocationPayload( ptr ); tracy_free_fast( (void*)ptr ); break; } case QueueType::GpuZoneEnd: { int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->gpuZoneEnd.cpuTime, dt ); break; } case QueueType::GpuContextName: ptr = MemRead( &item->gpuContextNameFat.ptr ); size = MemRead( &item->gpuContextNameFat.size ); SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND tracy_free_fast( (void*)ptr ); #endif break; case QueueType::PlotDataInt: case QueueType::PlotDataFloat: case QueueType::PlotDataDouble: { int64_t t = MemRead( &item->plotDataInt.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->plotDataInt.time, dt ); break; } case QueueType::ContextSwitch: { int64_t t = MemRead( &item->contextSwitch.time ); int64_t dt = t - refCtx; refCtx = t; MemWrite( &item->contextSwitch.time, dt ); break; } case QueueType::ThreadWakeup: { int64_t t = MemRead( &item->threadWakeup.time ); int64_t dt = t - refCtx; refCtx = t; MemWrite( &item->threadWakeup.time, dt ); break; } case QueueType::GpuTime: { int64_t t = MemRead( &item->gpuTime.gpuTime ); int64_t dt = t - refGpu; refGpu = t; MemWrite( &item->gpuTime.gpuTime, dt ); break; } #ifdef TRACY_HAS_CALLSTACK case QueueType::CallstackFrameSize: { auto data = (const CallstackEntry*)MemRead( &item->callstackFrameSizeFat.data ); auto datasz = MemRead( &item->callstackFrameSizeFat.size ); auto imageName = (const char*)MemRead( &item->callstackFrameSizeFat.imageName ); SendSingleString( imageName ); AppendData( item++, QueueDataSize[idx] ); for( uint8_t i=0; i( &item->symbolInformationFat.fileString ); auto needFree = MemRead( &item->symbolInformationFat.needFree ); SendSingleString( fileString ); if( needFree ) tracy_free_fast( (void*)fileString ); break; } case QueueType::SymbolCodeMetadata: { auto symbol = MemRead( &item->symbolCodeMetadata.symbol ); auto ptr = (const char*)MemRead( &item->symbolCodeMetadata.ptr ); auto size = MemRead( &item->symbolCodeMetadata.size ); SendLongString( symbol, ptr, size, QueueType::SymbolCode ); tracy_free_fast( (void*)ptr ); ++item; continue; } #endif #ifdef TRACY_HAS_SYSTEM_TRACING case QueueType::ExternalNameMetadata: { auto thread = MemRead( &item->externalNameMetadata.thread ); auto name = (const char*)MemRead( &item->externalNameMetadata.name ); auto threadName = (const char*)MemRead( &item->externalNameMetadata.threadName ); SendString( thread, threadName, QueueType::ExternalThreadName ); SendString( thread, name, QueueType::ExternalName ); tracy_free_fast( (void*)threadName ); tracy_free_fast( (void*)name ); ++item; continue; } #endif case QueueType::SourceCodeMetadata: { auto ptr = (const char*)MemRead( &item->sourceCodeMetadata.ptr ); auto size = MemRead( &item->sourceCodeMetadata.size ); auto id = MemRead( &item->sourceCodeMetadata.id ); SendLongString( (uint64_t)id, ptr, size, QueueType::SourceCode ); tracy_free_fast( (void*)ptr ); ++item; continue; } default: assert( false ); break; } } if( !AppendData( item++, QueueDataSize[idx] ) ) { connectionLost = true; m_refTimeThread = refThread; m_refTimeCtx = refCtx; m_refTimeGpu = refGpu; return; } } m_refTimeThread = refThread; m_refTimeCtx = refCtx; m_refTimeGpu = refGpu; } ); if( connectionLost ) return DequeueStatus::ConnectionLost; return sz > 0 ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; } Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop ) { const auto sz = GetQueue().try_dequeue_bulk_single( token, [] ( const uint64_t& ) {}, [this, &timeStop] ( QueueItem* item, size_t sz ) { assert( sz > 0 ); int64_t refCtx = m_refTimeCtx; while( sz-- > 0 ) { FreeAssociatedMemory( *item ); if( timeStop < 0 ) return; const auto idx = MemRead( &item->hdr.idx ); if( idx == (uint8_t)QueueType::ContextSwitch ) { const auto csTime = MemRead( &item->contextSwitch.time ); if( csTime > timeStop ) { timeStop = -1; m_refTimeCtx = refCtx; return; } int64_t dt = csTime - refCtx; refCtx = csTime; MemWrite( &item->contextSwitch.time, dt ); if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) ) { timeStop = -2; m_refTimeCtx = refCtx; return; } } else if( idx == (uint8_t)QueueType::ThreadWakeup ) { const auto csTime = MemRead( &item->threadWakeup.time ); if( csTime > timeStop ) { timeStop = -1; m_refTimeCtx = refCtx; return; } int64_t dt = csTime - refCtx; refCtx = csTime; MemWrite( &item->threadWakeup.time, dt ); if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) ) { timeStop = -2; m_refTimeCtx = refCtx; return; } } item++; } m_refTimeCtx = refCtx; } ); if( timeStop == -2 ) return DequeueStatus::ConnectionLost; return ( timeStop == -1 || sz > 0 ) ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; } #define ThreadCtxCheckSerial( _name ) \ uint32_t thread = MemRead( &item->_name.thread ); \ switch( ThreadCtxCheck( thread ) ) \ { \ case ThreadCtxStatus::Same: break; \ case ThreadCtxStatus::Changed: assert( m_refTimeThread == 0 ); refThread = 0; break; \ case ThreadCtxStatus::ConnectionLost: return DequeueStatus::ConnectionLost; \ default: assert( false ); break; \ } Profiler::DequeueStatus Profiler::DequeueSerial() { { bool lockHeld = true; while( !m_serialLock.try_lock() ) { if( m_shutdownManual.load( std::memory_order_relaxed ) ) { lockHeld = false; break; } } if( !m_serialQueue.empty() ) m_serialQueue.swap( m_serialDequeue ); if( lockHeld ) { m_serialLock.unlock(); } } const auto sz = m_serialDequeue.size(); if( sz > 0 ) { InitRpmalloc(); int64_t refSerial = m_refTimeSerial; int64_t refGpu = m_refTimeGpu; #ifdef TRACY_FIBERS int64_t refThread = m_refTimeThread; #endif auto item = m_serialDequeue.data(); auto end = item + sz; while( item != end ) { uint64_t ptr; auto idx = MemRead( &item->hdr.idx ); if( idx < (int)QueueType::Terminate ) { switch( (QueueType)idx ) { case QueueType::CallstackSerial: ptr = MemRead( &item->callstackFat.ptr ); SendCallstackPayload( ptr ); tracy_free_fast( (void*)ptr ); break; case QueueType::LockWait: case QueueType::LockSharedWait: { int64_t t = MemRead( &item->lockWait.time ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->lockWait.time, dt ); break; } case QueueType::LockObtain: case QueueType::LockSharedObtain: { int64_t t = MemRead( &item->lockObtain.time ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->lockObtain.time, dt ); break; } case QueueType::LockRelease: case QueueType::LockSharedRelease: { int64_t t = MemRead( &item->lockRelease.time ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->lockRelease.time, dt ); break; } case QueueType::LockName: { ptr = MemRead( &item->lockNameFat.name ); uint16_t size = MemRead( &item->lockNameFat.size ); SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND tracy_free_fast( (void*)ptr ); #endif break; } case QueueType::MemAlloc: case QueueType::MemAllocNamed: case QueueType::MemAllocCallstack: case QueueType::MemAllocCallstackNamed: { int64_t t = MemRead( &item->memAlloc.time ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->memAlloc.time, dt ); break; } case QueueType::MemFree: case QueueType::MemFreeNamed: case QueueType::MemFreeCallstack: case QueueType::MemFreeCallstackNamed: { int64_t t = MemRead( &item->memFree.time ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->memFree.time, dt ); break; } case QueueType::MemDiscard: case QueueType::MemDiscardCallstack: { int64_t t = MemRead( &item->memDiscard.time ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->memDiscard.time, dt ); break; } case QueueType::GpuZoneBeginSerial: case QueueType::GpuZoneBeginCallstackSerial: { int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->gpuZoneBegin.cpuTime, dt ); break; } case QueueType::GpuZoneBeginAllocSrcLocSerial: case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: { int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->gpuZoneBegin.cpuTime, dt ); ptr = MemRead( &item->gpuZoneBegin.srcloc ); SendSourceLocationPayload( ptr ); tracy_free_fast( (void*)ptr ); break; } case QueueType::GpuZoneEndSerial: { int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); int64_t dt = t - refSerial; refSerial = t; MemWrite( &item->gpuZoneEnd.cpuTime, dt ); break; } case QueueType::GpuTime: { int64_t t = MemRead( &item->gpuTime.gpuTime ); int64_t dt = t - refGpu; refGpu = t; MemWrite( &item->gpuTime.gpuTime, dt ); break; } case QueueType::GpuContextName: { ptr = MemRead( &item->gpuContextNameFat.ptr ); uint16_t size = MemRead( &item->gpuContextNameFat.size ); SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND tracy_free_fast( (void*)ptr ); #endif break; } #ifdef TRACY_FIBERS case QueueType::ZoneBegin: case QueueType::ZoneBeginCallstack: { ThreadCtxCheckSerial( zoneBeginThread ); int64_t t = MemRead( &item->zoneBegin.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->zoneBegin.time, dt ); break; } case QueueType::ZoneBeginAllocSrcLoc: case QueueType::ZoneBeginAllocSrcLocCallstack: { ThreadCtxCheckSerial( zoneBeginThread ); int64_t t = MemRead( &item->zoneBegin.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->zoneBegin.time, dt ); ptr = MemRead( &item->zoneBegin.srcloc ); SendSourceLocationPayload( ptr ); tracy_free_fast( (void*)ptr ); break; } case QueueType::ZoneEnd: { ThreadCtxCheckSerial( zoneEndThread ); int64_t t = MemRead( &item->zoneEnd.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->zoneEnd.time, dt ); break; } case QueueType::ZoneText: case QueueType::ZoneName: { ThreadCtxCheckSerial( zoneTextFatThread ); ptr = MemRead( &item->zoneTextFat.text ); uint16_t size = MemRead( &item->zoneTextFat.size ); SendSingleString( (const char*)ptr, size ); tracy_free_fast( (void*)ptr ); break; } case QueueType::Message: case QueueType::MessageCallstack: { ThreadCtxCheckSerial( messageFatThread ); ptr = MemRead( &item->messageFat.text ); uint16_t size = MemRead( &item->messageFat.size ); SendSingleString( (const char*)ptr, size ); tracy_free_fast( (void*)ptr ); break; } case QueueType::MessageColor: case QueueType::MessageColorCallstack: { ThreadCtxCheckSerial( messageColorFatThread ); ptr = MemRead( &item->messageColorFat.text ); uint16_t size = MemRead( &item->messageColorFat.size ); SendSingleString( (const char*)ptr, size ); tracy_free_fast( (void*)ptr ); break; } case QueueType::Callstack: { ThreadCtxCheckSerial( callstackFatThread ); ptr = MemRead( &item->callstackFat.ptr ); SendCallstackPayload( ptr ); tracy_free_fast( (void*)ptr ); break; } case QueueType::CallstackAlloc: { ThreadCtxCheckSerial( callstackAllocFatThread ); ptr = MemRead( &item->callstackAllocFat.nativePtr ); if( ptr != 0 ) { CutCallstack( (void*)ptr, "lua_pcall" ); SendCallstackPayload( ptr ); tracy_free_fast( (void*)ptr ); } ptr = MemRead( &item->callstackAllocFat.ptr ); SendCallstackAlloc( ptr ); tracy_free_fast( (void*)ptr ); break; } case QueueType::FiberEnter: { ThreadCtxCheckSerial( fiberEnter ); int64_t t = MemRead( &item->fiberEnter.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->fiberEnter.time, dt ); break; } case QueueType::FiberLeave: { ThreadCtxCheckSerial( fiberLeave ); int64_t t = MemRead( &item->fiberLeave.time ); int64_t dt = t - refThread; refThread = t; MemWrite( &item->fiberLeave.time, dt ); break; } #endif default: assert( false ); break; } } #ifdef TRACY_FIBERS else { switch( (QueueType)idx ) { case QueueType::ZoneColor: { ThreadCtxCheckSerial( zoneColorThread ); break; } case QueueType::ZoneValue: { ThreadCtxCheckSerial( zoneValueThread ); break; } case QueueType::ZoneValidation: { ThreadCtxCheckSerial( zoneValidationThread ); break; } case QueueType::MessageLiteral: case QueueType::MessageLiteralCallstack: { ThreadCtxCheckSerial( messageLiteralThread ); break; } case QueueType::MessageLiteralColor: case QueueType::MessageLiteralColorCallstack: { ThreadCtxCheckSerial( messageColorLiteralThread ); break; } case QueueType::CrashReport: { ThreadCtxCheckSerial( crashReportThread ); break; } default: break; } } #endif if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost; item++; } m_refTimeSerial = refSerial; m_refTimeGpu = refGpu; #ifdef TRACY_FIBERS m_refTimeThread = refThread; #endif m_serialDequeue.clear(); } else { return DequeueStatus::QueueEmpty; } return DequeueStatus::DataDequeued; } Profiler::ThreadCtxStatus Profiler::ThreadCtxCheck( uint32_t threadId ) { if( m_threadCtx == threadId ) return ThreadCtxStatus::Same; QueueItem item; MemWrite( &item.hdr.type, QueueType::ThreadContext ); MemWrite( &item.threadCtx.thread, threadId ); if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return ThreadCtxStatus::ConnectionLost; m_threadCtx = threadId; m_refTimeThread = 0; return ThreadCtxStatus::Changed; } bool Profiler::CommitData() { bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart ); if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0; m_bufferStart = m_bufferOffset; return ret; } char* Profiler::SafeCopyProlog( const char* data, size_t size ) { bool success = true; char* buf = m_safeSendBuffer; #ifndef NDEBUG assert( !m_inUse.exchange(true) ); #endif if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); #ifdef _WIN32 __try { memcpy( buf, data, size ); } __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) { success = false; } #else // Send through the pipe to ensure safe reads for( size_t offset = 0; offset != size; /*in loop*/ ) { size_t sendsize = size - offset; ssize_t result1, result2; while( ( result1 = write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno == EINTR ) { /* retry */ } if( result1 < 0 ) { success = false; break; } while( ( result2 = read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno == EINTR ) { /* retry */ } if( result2 != result1 ) { success = false; break; } offset += result1; } #endif if( success ) return buf; SafeCopyEpilog( buf ); return nullptr; } void Profiler::SafeCopyEpilog( char* buf ) { if( buf != m_safeSendBuffer ) tracy_free( buf ); #ifndef NDEBUG m_inUse.store( false ); #endif } bool Profiler::SendData( const char* data, size_t len ) { const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 ); memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) ); return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1; } void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type ) { assert( type == QueueType::StringData || type == QueueType::ThreadName || type == QueueType::PlotName || type == QueueType::FrameName || type == QueueType::ExternalName || type == QueueType::ExternalThreadName || type == QueueType::FiberName ); QueueItem item; MemWrite( &item.hdr.type, type ); MemWrite( &item.stringTransfer.ptr, str ); assert( len <= std::numeric_limits::max() ); auto l16 = uint16_t( len ); NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 ); AppendDataUnsafe( &item, QueueDataSize[(int)type] ); AppendDataUnsafe( &l16, sizeof( l16 ) ); AppendDataUnsafe( ptr, l16 ); } void Profiler::SendSingleString( const char* ptr, size_t len ) { QueueItem item; MemWrite( &item.hdr.type, QueueType::SingleStringData ); assert( len <= std::numeric_limits::max() ); auto l16 = uint16_t( len ); NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] ); AppendDataUnsafe( &l16, sizeof( l16 ) ); AppendDataUnsafe( ptr, l16 ); } void Profiler::SendSecondString( const char* ptr, size_t len ) { QueueItem item; MemWrite( &item.hdr.type, QueueType::SecondStringData ); assert( len <= std::numeric_limits::max() ); auto l16 = uint16_t( len ); NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] ); AppendDataUnsafe( &l16, sizeof( l16 ) ); AppendDataUnsafe( ptr, l16 ); } void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type ) { assert( type == QueueType::FrameImageData || type == QueueType::SymbolCode || type == QueueType::SourceCode ); QueueItem item; MemWrite( &item.hdr.type, type ); MemWrite( &item.stringTransfer.ptr, str ); assert( len <= std::numeric_limits::max() ); assert( QueueDataSize[(int)type] + sizeof( uint32_t ) + len <= TargetFrameSize ); auto l32 = uint32_t( len ); NeedDataSize( QueueDataSize[(int)type] + sizeof( l32 ) + l32 ); AppendDataUnsafe( &item, QueueDataSize[(int)type] ); AppendDataUnsafe( &l32, sizeof( l32 ) ); AppendDataUnsafe( ptr, l32 ); } void Profiler::SendSourceLocation( uint64_t ptr ) { auto srcloc = (const SourceLocationData*)ptr; QueueItem item; MemWrite( &item.hdr.type, QueueType::SourceLocation ); MemWrite( &item.srcloc.name, (uint64_t)srcloc->name ); MemWrite( &item.srcloc.file, (uint64_t)srcloc->file ); MemWrite( &item.srcloc.function, (uint64_t)srcloc->function ); MemWrite( &item.srcloc.line, srcloc->line ); MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color ) & 0xFF ) ); MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) ); MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) ); AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] ); } void Profiler::SendSourceLocationPayload( uint64_t _ptr ) { auto ptr = (const char*)_ptr; QueueItem item; MemWrite( &item.hdr.type, QueueType::SourceLocationPayload ); MemWrite( &item.stringTransfer.ptr, _ptr ); uint16_t len; memcpy( &len, ptr, sizeof( len ) ); assert( len > 2 ); len -= 2; ptr += 2; NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] ); AppendDataUnsafe( &len, sizeof( len ) ); AppendDataUnsafe( ptr, len ); } void Profiler::SendCallstackPayload( uint64_t _ptr ) { auto ptr = (uintptr_t*)_ptr; QueueItem item; MemWrite( &item.hdr.type, QueueType::CallstackPayload ); MemWrite( &item.stringTransfer.ptr, _ptr ); const auto sz = *ptr++; const auto len = sz * sizeof( uint64_t ); const auto l16 = uint16_t( len ); NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] ); AppendDataUnsafe( &l16, sizeof( l16 ) ); if( compile_time_condition::value ) { AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz ); } else { for( uintptr_t i=0; i> 63 != 0 ) { SendSingleString( "" ); QueueItem item; MemWrite( &item.hdr.type, QueueType::SymbolInformation ); MemWrite( &item.symbolInformation.line, 0 ); MemWrite( &item.symbolInformation.symAddr, symbol ); AppendData( &item, QueueDataSize[(int)QueueType::SymbolInformation] ); } else { m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SymbolQuery, symbol } ); } #else AckServerQuery(); #endif } void Profiler::QueueExternalName( uint64_t ptr ) { #ifdef TRACY_HAS_SYSTEM_TRACING m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::ExternalName, ptr } ); #endif } void Profiler::QueueKernelCode( uint64_t symbol, uint32_t size ) { assert( symbol >> 63 != 0 ); #ifdef TRACY_HAS_CALLSTACK m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::KernelCode, symbol, size } ); #else AckSymbolCodeNotAvailable(); #endif } void Profiler::QueueSourceCodeQuery( uint32_t id ) { assert( m_exectime != 0 ); assert( m_queryData ); m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SourceCode, uint64_t( m_queryData ), uint64_t( m_queryImage ), id } ); m_queryData = nullptr; m_queryImage = nullptr; } #ifdef TRACY_HAS_CALLSTACK void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) { switch( si.type ) { case SymbolQueueItemType::CallstackFrame: { const auto frameData = DecodeCallstackPtr( si.ptr ); auto data = tracy_malloc_fast( sizeof( CallstackEntry ) * frameData.size ); memcpy( data, frameData.data, sizeof( CallstackEntry ) * frameData.size ); TracyLfqPrepare( QueueType::CallstackFrameSize ); MemWrite( &item->callstackFrameSizeFat.ptr, si.ptr ); MemWrite( &item->callstackFrameSizeFat.size, frameData.size ); MemWrite( &item->callstackFrameSizeFat.data, (uint64_t)data ); MemWrite( &item->callstackFrameSizeFat.imageName, (uint64_t)frameData.imageName ); TracyLfqCommit; break; } case SymbolQueueItemType::SymbolQuery: { #ifdef __ANDROID__ // On Android it's common for code to be in mappings that are only executable // but not readable. if( !EnsureReadable( si.ptr ) ) { TracyLfqPrepare( QueueType::AckServerQueryNoop ); TracyLfqCommit; break; } #endif const auto sym = DecodeSymbolAddress( si.ptr ); TracyLfqPrepare( QueueType::SymbolInformation ); MemWrite( &item->symbolInformationFat.line, sym.line ); MemWrite( &item->symbolInformationFat.symAddr, si.ptr ); MemWrite( &item->symbolInformationFat.fileString, (uint64_t)sym.file ); MemWrite( &item->symbolInformationFat.needFree, (uint8_t)sym.needFree ); TracyLfqCommit; break; } #ifdef TRACY_HAS_SYSTEM_TRACING case SymbolQueueItemType::ExternalName: { const char* threadName; const char* name; SysTraceGetExternalName( si.ptr, threadName, name ); TracyLfqPrepare( QueueType::ExternalNameMetadata ); MemWrite( &item->externalNameMetadata.thread, si.ptr ); MemWrite( &item->externalNameMetadata.name, (uint64_t)name ); MemWrite( &item->externalNameMetadata.threadName, (uint64_t)threadName ); TracyLfqCommit; break; } #endif case SymbolQueueItemType::KernelCode: { #ifdef _WIN32 auto mod = GetKernelModulePath( si.ptr ); if( mod ) { auto fn = DecodeCallstackPtrFast( si.ptr ); if( *fn ) { auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); if( hnd ) { auto ptr = (const void*)GetProcAddress( hnd, fn ); if( ptr ) { auto buf = (char*)tracy_malloc( si.extra ); memcpy( buf, ptr, si.extra ); FreeLibrary( hnd ); TracyLfqPrepare( QueueType::SymbolCodeMetadata ); MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); TracyLfqCommit; break; } FreeLibrary( hnd ); } } } #elif defined __linux__ void* data = m_kcore->Retrieve( si.ptr, si.extra ); if( data ) { TracyLfqPrepare( QueueType::SymbolCodeMetadata ); MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)data ); MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); TracyLfqCommit; break; } #endif TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable ); TracyLfqCommit; break; } case SymbolQueueItemType::SourceCode: HandleSourceCodeQuery( (char*)si.ptr, (char*)si.extra, si.id ); break; default: assert( false ); break; } } void Profiler::SymbolWorker() { #if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER s_symbolTid = syscall( SYS_gettid ); #endif ThreadExitHandler threadExitHandler; SetThreadName( "Tracy Symbol Worker" ); #ifdef TRACY_USE_RPMALLOC InitRpmalloc(); #endif InitCallstack(); while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); for(;;) { const auto shouldExit = ShouldExit(); #ifdef TRACY_ON_DEMAND if( !IsConnected() ) { if( shouldExit ) { s_symbolThreadGone.store( true, std::memory_order_release ); return; } while( m_symbolQueue.front() ) m_symbolQueue.pop(); std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); continue; } #endif auto si = m_symbolQueue.front(); if( si ) { HandleSymbolQueueItem( *si ); m_symbolQueue.pop(); } else { if( shouldExit ) { s_symbolThreadGone.store( true, std::memory_order_release ); return; } std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); } } } #endif bool Profiler::HandleServerQuery() { ServerQueryPacket payload; if( !m_sock->Read( &payload, sizeof( payload ), 10 ) ) return false; uint8_t type; uint64_t ptr; memcpy( &type, &payload.type, sizeof( payload.type ) ); memcpy( &ptr, &payload.ptr, sizeof( payload.ptr ) ); switch( type ) { case ServerQueryString: SendString( ptr, (const char*)ptr, QueueType::StringData ); break; case ServerQueryThreadString: if( ptr == m_mainThread ) { SendString( ptr, "Main thread", 11, QueueType::ThreadName ); } else { auto t = GetThreadNameData( (uint32_t)ptr ); if( t ) { SendString( ptr, t->name, QueueType::ThreadName ); if( t->groupHint != 0 ) { TracyLfqPrepare( QueueType::ThreadGroupHint ); MemWrite( &item->threadGroupHint.thread, (uint32_t)ptr ); MemWrite( &item->threadGroupHint.groupHint, t->groupHint ); TracyLfqCommit; } } else { SendString( ptr, GetThreadName( (uint32_t)ptr ), QueueType::ThreadName ); } } break; case ServerQuerySourceLocation: SendSourceLocation( ptr ); break; case ServerQueryPlotName: SendString( ptr, (const char*)ptr, QueueType::PlotName ); break; case ServerQueryTerminate: return false; case ServerQueryCallstackFrame: QueueCallstackFrame( ptr ); break; case ServerQueryFrameName: SendString( ptr, (const char*)ptr, QueueType::FrameName ); break; case ServerQueryDisconnect: HandleDisconnect(); return false; #ifdef TRACY_HAS_SYSTEM_TRACING case ServerQueryExternalName: QueueExternalName( ptr ); break; #endif case ServerQueryParameter: HandleParameter( ptr ); break; case ServerQuerySymbol: QueueSymbolQuery( ptr ); break; #ifndef TRACY_NO_CODE_TRANSFER case ServerQuerySymbolCode: HandleSymbolCodeQuery( ptr, payload.extra ); break; #endif case ServerQuerySourceCode: QueueSourceCodeQuery( uint32_t( ptr ) ); break; case ServerQueryDataTransfer: if( m_queryData ) { assert( !m_queryImage ); m_queryImage = m_queryData; } m_queryDataPtr = m_queryData = (char*)tracy_malloc( ptr + 11 ); AckServerQuery(); break; case ServerQueryDataTransferPart: memcpy( m_queryDataPtr, &ptr, 8 ); memcpy( m_queryDataPtr+8, &payload.extra, 4 ); m_queryDataPtr += 12; AckServerQuery(); break; #ifdef TRACY_FIBERS case ServerQueryFiberName: SendString( ptr, (const char*)ptr, QueueType::FiberName ); break; #endif default: assert( false ); break; } return true; } void Profiler::HandleDisconnect() { moodycamel::ConsumerToken token( GetQueue() ); #ifdef TRACY_HAS_SYSTEM_TRACING if( s_sysTraceThread ) { auto timestamp = GetTime(); for(;;) { const auto status = DequeueContextSwitches( token, timestamp ); if( status == DequeueStatus::ConnectionLost ) { return; } else if( status == DequeueStatus::QueueEmpty ) { if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) return; } } if( timestamp < 0 ) { if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) return; } break; } ClearSerial(); if( m_sock->HasData() ) { while( m_sock->HasData() ) { if( !HandleServerQuery() ) return; } if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) return; } } else { if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) return; } std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); } } } #endif QueueItem terminate; MemWrite( &terminate.hdr.type, QueueType::Terminate ); if( !SendData( (const char*)&terminate, 1 ) ) return; for(;;) { ClearQueues( token ); if( m_sock->HasData() ) { while( m_sock->HasData() ) { if( !HandleServerQuery() ) return; } if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) return; } } else { if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) return; } std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); } } } void Profiler::CalibrateTimer() { m_timerMul = 1.; #ifdef TRACY_HW_TIMER # if !defined TRACY_TIMER_QPC && defined TRACY_TIMER_FALLBACK const bool needCalibration = HardwareSupportsInvariantTSC(); # else const bool needCalibration = true; # endif if( needCalibration ) { std::atomic_signal_fence( std::memory_order_acq_rel ); const auto t0 = std::chrono::high_resolution_clock::now(); const auto r0 = GetTime(); std::atomic_signal_fence( std::memory_order_acq_rel ); std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) ); std::atomic_signal_fence( std::memory_order_acq_rel ); const auto t1 = std::chrono::high_resolution_clock::now(); const auto r1 = GetTime(); std::atomic_signal_fence( std::memory_order_acq_rel ); const auto dt = std::chrono::duration_cast( t1 - t0 ).count(); const auto dr = r1 - r0; m_timerMul = double( dt ) / double( dr ); } #endif } void Profiler::CalibrateDelay() { constexpr int Iterations = 50000; auto mindiff = std::numeric_limits::max(); for( int i=0; i 0 && dti < mindiff ) mindiff = dti; } m_resolution = mindiff; #ifdef TRACY_DELAYED_INIT m_delay = m_resolution; #else constexpr int Events = Iterations * 2; // start + end static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" ); static const tracy::SourceLocationData __tracy_source_location { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; const auto t0 = GetTime(); for( int i=0; izoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location ); TracyLfqCommit; } { TracyLfqPrepare( QueueType::ZoneEnd ); MemWrite( &item->zoneEnd.time, GetTime() ); TracyLfqCommit; } } const auto t1 = GetTime(); const auto dt = t1 - t0; m_delay = dt / Events; moodycamel::ConsumerToken token( GetQueue() ); int left = Events; while( left != 0 ) { const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, [](QueueItem* item, size_t sz){} ); assert( sz > 0 ); left -= (int)sz; } assert( GetQueue().size_approx() == 0 ); #endif } void Profiler::ReportTopology() { #ifndef TRACY_DELAYED_INIT struct CpuData { uint32_t package; uint32_t die; uint32_t core; uint32_t thread; }; #if defined _WIN32 # ifdef TRACY_UWP t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = &::GetLogicalProcessorInformationEx; # else t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = (t_GetLogicalProcessorInformationEx)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetLogicalProcessorInformationEx" ); # endif if( !_GetLogicalProcessorInformationEx ) return; SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* packageInfo = nullptr; SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* dieInfo = nullptr; SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* coreInfo = nullptr; DWORD psz = 0; _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz ); if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) { packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); assert( res ); } else { psz = 0; } DWORD dsz = 0; _GetLogicalProcessorInformationEx( RelationProcessorDie, nullptr, &dsz ); if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) { dieInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( dsz ); auto res = _GetLogicalProcessorInformationEx( RelationProcessorDie, dieInfo, &dsz ); assert( res ); } else { dsz = 0; } DWORD csz = 0; _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz ); if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) { coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); auto res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); assert( res ); } else { csz = 0; } SYSTEM_INFO sysinfo; GetSystemInfo( &sysinfo ); const uint32_t numcpus = sysinfo.dwNumberOfProcessors; auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); memset( cpuData, 0, sizeof( CpuData ) * numcpus ); for( uint32_t i=0; iRelationship == RelationProcessorPackage ); // FIXME account for GroupCount auto mask = ptr->Processor.GroupMask[0].Mask; int core = 0; while( mask != 0 ) { if( mask & 1 ) cpuData[core].package = idx; core++; mask >>= 1; } ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); idx++; } idx = 0; ptr = dieInfo; while( (char*)ptr < ((char*)dieInfo) + dsz ) { assert( ptr->Relationship == RelationProcessorDie ); // FIXME account for GroupCount auto mask = ptr->Processor.GroupMask[0].Mask; int core = 0; while( mask != 0 ) { if( mask & 1 ) cpuData[core].die = idx; core++; mask >>= 1; } ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); idx++; } idx = 0; ptr = coreInfo; while( (char*)ptr < ((char*)coreInfo) + csz ) { assert( ptr->Relationship == RelationProcessorCore ); // FIXME account for GroupCount auto mask = ptr->Processor.GroupMask[0].Mask; int core = 0; while( mask != 0 ) { if( mask & 1 ) cpuData[core].core = idx; core++; mask >>= 1; } ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); idx++; } for( uint32_t i=0; icpuTopology.package, data.package ); MemWrite( &item->cpuTopology.die, data.die ); MemWrite( &item->cpuTopology.core, data.core ); MemWrite( &item->cpuTopology.thread, data.thread ); #ifdef TRACY_ON_DEMAND DeferItem( *item ); #endif TracyLfqCommit; } tracy_free( cpuData ); tracy_free( coreInfo ); tracy_free( packageInfo ); #elif defined __linux__ const int numcpus = std::thread::hardware_concurrency(); auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); memset( cpuData, 0, sizeof( CpuData ) * numcpus ); const char* basePath = "/sys/devices/system/cpu/cpu"; for( int i=0; icpuTopology.package, data.package ); MemWrite( &item->cpuTopology.die, data.die ); MemWrite( &item->cpuTopology.core, data.core ); MemWrite( &item->cpuTopology.thread, data.thread ); #ifdef TRACY_ON_DEMAND DeferItem( *item ); #endif TracyLfqCommit; } tracy_free( cpuData ); #endif #endif } void Profiler::SendCallstack( int depth, const char* skipBefore ) { #ifdef TRACY_HAS_CALLSTACK auto ptr = Callstack( depth ); CutCallstack( ptr, skipBefore ); TracyQueuePrepare( QueueType::Callstack ); MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); TracyQueueCommit( callstackFatThread ); #endif } void Profiler::CutCallstack( void* callstack, const char* skipBefore ) { #ifdef TRACY_HAS_CALLSTACK auto data = (uintptr_t*)callstack; const auto sz = *data++; uintptr_t i; for( i=0; i 100000000 ) // 100 ms { auto sysTime = m_sysTime.Get(); if( sysTime >= 0 ) { m_sysTimeLast = t; TracyLfqPrepare( QueueType::SysTimeReport ); MemWrite( &item->sysTime.time, GetTime() ); MemWrite( &item->sysTime.sysTime, sysTime ); TracyLfqCommit; } } } #endif void Profiler::HandleParameter( uint64_t payload ) { assert( m_paramCallback ); const auto idx = uint32_t( payload >> 32 ); const auto val = int32_t( payload & 0xFFFFFFFF ); m_paramCallback( m_paramCallbackData, idx, val ); AckServerQuery(); } void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) { if( symbol >> 63 != 0 ) { QueueKernelCode( symbol, size ); } else { auto&& lambda = [ this, symbol ]( const char* buf, size_t size ) { SendLongString( symbol, buf, size, QueueType::SymbolCode ); }; // 'symbol' may have come from a module that has since unloaded, perform a safe copy before sending if( !WithSafeCopy( (const char*)symbol, size, lambda ) ) AckSymbolCodeNotAvailable(); } } void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) { bool ok = false; FILE* f = fopen( data, "rb" ); if( f ) { struct stat st; if( fstat( fileno( f ), &st ) == 0 && (uint64_t)st.st_mtime < m_exectime && st.st_size < ( TargetFrameSize - 16 ) ) { auto ptr = (char*)tracy_malloc_fast( st.st_size ); auto rd = fread( ptr, 1, st.st_size, f ); if( rd == (size_t)st.st_size ) { TracyLfqPrepare( QueueType::SourceCodeMetadata ); MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); MemWrite( &item->sourceCodeMetadata.id, id ); TracyLfqCommit; ok = true; } else { tracy_free_fast( ptr ); } } fclose( f ); } #ifdef TRACY_DEBUGINFOD else if( image && data[0] == '/' ) { size_t size; auto buildid = GetBuildIdForImage( image, size ); if( buildid ) { auto d = debuginfod_find_source( GetDebuginfodClient(), buildid, size, data, nullptr ); TracyDebug( "DebugInfo source query: %s, fn: %s, image: %s\n", d >= 0 ? " ok " : "fail", data, image ); if( d >= 0 ) { struct stat st; fstat( d, &st ); if( st.st_size < ( TargetFrameSize - 16 ) ) { lseek( d, 0, SEEK_SET ); auto ptr = (char*)tracy_malloc_fast( st.st_size ); auto rd = read( d, ptr, st.st_size ); if( rd == (size_t)st.st_size ) { TracyLfqPrepare( QueueType::SourceCodeMetadata ); MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); MemWrite( &item->sourceCodeMetadata.id, id ); TracyLfqCommit; ok = true; } else { tracy_free_fast( ptr ); } } close( d ); } } } else { TracyDebug( "DebugInfo invalid query fn: %s, image: %s\n", data, image ); } #endif if( !ok && m_sourceCallback ) { size_t sz; char* ptr = m_sourceCallback( m_sourceCallbackData, data, sz ); if( ptr ) { if( sz < ( TargetFrameSize - 16 ) ) { TracyLfqPrepare( QueueType::SourceCodeMetadata ); MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); MemWrite( &item->sourceCodeMetadata.size, (uint32_t)sz ); MemWrite( &item->sourceCodeMetadata.id, id ); TracyLfqCommit; ok = true; } else { tracy_free_fast( ptr ); } } } if( !ok ) { TracyLfqPrepare( QueueType::AckSourceCodeNotAvailable ); MemWrite( &item->sourceCodeNotAvailable, id ); TracyLfqCommit; } tracy_free_fast( data ); tracy_free_fast( image ); } #if defined _WIN32 && defined TRACY_TIMER_QPC int64_t Profiler::GetTimeQpc() { LARGE_INTEGER t; QueryPerformanceCounter( &t ); return t.QuadPart; } #endif } #ifdef __cplusplus extern "C" { #endif TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND ctx.active = active && tracy::GetProfiler().IsConnected(); #else ctx.active = active; #endif if( !ctx.active ) return ctx; const auto id = tracy::GetProfiler().GetNextZoneId(); ctx.id = id; #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, id ); TracyQueueCommitC( zoneValidationThread ); } #endif { TracyQueuePrepareC( tracy::QueueType::ZoneBegin ); tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); TracyQueueCommitC( zoneBeginThread ); } return ctx; } TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND ctx.active = active && tracy::GetProfiler().IsConnected(); #else ctx.active = active; #endif if( !ctx.active ) return ctx; const auto id = tracy::GetProfiler().GetNextZoneId(); ctx.id = id; #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, id ); TracyQueueCommitC( zoneValidationThread ); } #endif tracy::GetProfiler().SendCallstack( depth ); { TracyQueuePrepareC( tracy::QueueType::ZoneBeginCallstack ); tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); TracyQueueCommitC( zoneBeginThread ); } return ctx; } TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND ctx.active = active && tracy::GetProfiler().IsConnected(); #else ctx.active = active; #endif if( !ctx.active ) { tracy::tracy_free( (void*)srcloc ); return ctx; } const auto id = tracy::GetProfiler().GetNextZoneId(); ctx.id = id; #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, id ); TracyQueueCommitC( zoneValidationThread ); } #endif { TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc ); tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); TracyQueueCommitC( zoneBeginThread ); } return ctx; } TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND ctx.active = active && tracy::GetProfiler().IsConnected(); #else ctx.active = active; #endif if( !ctx.active ) { tracy::tracy_free( (void*)srcloc ); return ctx; } const auto id = tracy::GetProfiler().GetNextZoneId(); ctx.id = id; #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, id ); TracyQueueCommitC( zoneValidationThread ); } #endif tracy::GetProfiler().SendCallstack( depth ); { TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack ); tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); TracyQueueCommitC( zoneBeginThread ); } return ctx; } TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ) { if( !ctx.active ) return; #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, ctx.id ); TracyQueueCommitC( zoneValidationThread ); } #endif { TracyQueuePrepareC( tracy::QueueType::ZoneEnd ); tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() ); TracyQueueCommitC( zoneEndThread ); } } TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ) { assert( size < std::numeric_limits::max() ); if( !ctx.active ) return; auto ptr = (char*)tracy::tracy_malloc( size ); memcpy( ptr, txt, size ); #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, ctx.id ); TracyQueueCommitC( zoneValidationThread ); } #endif { TracyQueuePrepareC( tracy::QueueType::ZoneText ); tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); TracyQueueCommitC( zoneTextFatThread ); } } TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ) { assert( size < std::numeric_limits::max() ); if( !ctx.active ) return; auto ptr = (char*)tracy::tracy_malloc( size ); memcpy( ptr, txt, size ); #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, ctx.id ); TracyQueueCommitC( zoneValidationThread ); } #endif { TracyQueuePrepareC( tracy::QueueType::ZoneName ); tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); TracyQueueCommitC( zoneTextFatThread ); } } TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ) { if( !ctx.active ) return; #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, ctx.id ); TracyQueueCommitC( zoneValidationThread ); } #endif { TracyQueuePrepareC( tracy::QueueType::ZoneColor ); tracy::MemWrite( &item->zoneColor.b, uint8_t( ( color ) & 0xFF ) ); tracy::MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); tracy::MemWrite( &item->zoneColor.r, uint8_t( ( color >> 16 ) & 0xFF ) ); TracyQueueCommitC( zoneColorThread ); } } TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) { if( !ctx.active ) return; #ifndef TRACY_NO_VERIFY { TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); tracy::MemWrite( &item->zoneValidation.id, ctx.id ); TracyQueueCommitC( zoneValidationThread ); } #endif { TracyQueuePrepareC( tracy::QueueType::ZoneValue ); tracy::MemWrite( &item->zoneValue.value, value ); TracyQueueCommitC( zoneValueThread ); } } TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); } TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); } TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ) { tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); } TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ) { tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); } TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); } TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step, fill, color ); } TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); } TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); } TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); } TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ) { return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, color ); } TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ) { return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); } TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) { TracyLfqPrepareC( tracy::QueueType::GpuZoneBegin ); tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) { tracy::GetProfiler().SendCallstack( data.depth ); TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginCallstack ); tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data data ) { TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLoc ); tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) { tracy::GetProfiler().SendCallstack( data.depth ); TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLocCallstack ); tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data data ) { TracyLfqPrepareC( tracy::QueueType::GpuTime ); tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); tracy::MemWrite( &item->gpuTime.context, data.context ); TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data ) { TracyLfqPrepareC( tracy::QueueType::GpuZoneEnd ); tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data ) { TracyLfqPrepareC( tracy::QueueType::GpuNewContext ); tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); tracy::MemWrite( &item->gpuNewContext.period, data.period ); tracy::MemWrite( &item->gpuNewContext.context, data.context ); tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); tracy::MemWrite( &item->gpuNewContext.type, data.type ); #ifdef TRACY_ON_DEMAND tracy::GetProfiler().DeferItem( *item ); #endif TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data data ) { auto ptr = (char*)tracy::tracy_malloc( data.len ); memcpy( ptr, data.name, data.len ); TracyLfqPrepareC( tracy::QueueType::GpuContextName ); tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); #ifdef TRACY_ON_DEMAND tracy::GetProfiler().DeferItem( *item ); #endif TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data data ) { TracyLfqPrepareC( tracy::QueueType::GpuCalibration ); tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); tracy::MemWrite( &item->gpuCalibration.context, data.context ); TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data data ) { TracyLfqPrepareC( tracy::QueueType::GpuTimeSync ); tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); tracy::MemWrite( &item->gpuTimeSync.context, data.context ); TracyLfqCommitC; } TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial ); tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) { auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginCallstackSerial ); tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data data ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial ); tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) { auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data data ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime ); tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); tracy::MemWrite( &item->gpuTime.context, data.context ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial ); tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_new_context_serial( ___tracy_gpu_new_context_data data ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext ); tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); tracy::MemWrite( &item->gpuNewContext.period, data.period ); tracy::MemWrite( &item->gpuNewContext.context, data.context ); tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); tracy::MemWrite( &item->gpuNewContext.type, data.type ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data data ) { auto ptr = (char*)tracy::tracy_malloc( data.len ); memcpy( ptr, data.name, data.len ); auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName ); tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data data ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration ); tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); tracy::MemWrite( &item->gpuCalibration.context, data.context ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data data ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTimeSync ); tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); tracy::MemWrite( &item->gpuTimeSync.context, data.context ); tracy::Profiler::QueueSerialFinish(); } struct __tracy_lockable_context_data { uint32_t m_id; #ifdef TRACY_ON_DEMAND std::atomic m_lockCount; std::atomic m_active; #endif }; TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) { struct __tracy_lockable_context_data *lockdata = (__tracy_lockable_context_data*)tracy::tracy_malloc( sizeof( __tracy_lockable_context_data ) ); lockdata->m_id =tracy:: GetLockCounter().fetch_add( 1, std::memory_order_relaxed ); #ifdef TRACY_ON_DEMAND new(&lockdata->m_lockCount) std::atomic( 0 ); new(&lockdata->m_active) std::atomic( false ); #endif assert( lockdata->m_id != (std::numeric_limits::max)() ); auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockAnnounce ); tracy::MemWrite( &item->lockAnnounce.id, lockdata->m_id ); tracy::MemWrite( &item->lockAnnounce.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); tracy::MemWrite( &item->lockAnnounce.type, tracy::LockType::Lockable ); #ifdef TRACY_ON_DEMAND tracy::GetProfiler().DeferItem( *item ); #endif tracy::Profiler::QueueSerialFinish(); return lockdata; } TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockTerminate ); tracy::MemWrite( &item->lockTerminate.id, lockdata->m_id ); tracy::MemWrite( &item->lockTerminate.time, tracy::Profiler::GetTime() ); #ifdef TRACY_ON_DEMAND tracy::GetProfiler().DeferItem( *item ); #endif tracy::Profiler::QueueSerialFinish(); #ifdef TRACY_ON_DEMAND lockdata->m_lockCount.~atomic(); lockdata->m_active.~atomic(); #endif tracy::tracy_free((void*)lockdata); } TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { #ifdef TRACY_ON_DEMAND bool queue = false; const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); const auto active = lockdata->m_active.load( std::memory_order_relaxed ); if( locks == 0 || active ) { const bool connected = tracy::GetProfiler().IsConnected(); if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); if( connected ) queue = true; } if( !queue ) return false; #endif auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockWait ); tracy::MemWrite( &item->lockWait.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->lockWait.id, lockdata->m_id ); tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() ); tracy::Profiler::QueueSerialFinish(); return true; } TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { #ifdef TRACY_ON_DEMAND lockdata->m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); if( !lockdata->m_active.load( std::memory_order_relaxed ) ) return; if( !tracy::GetProfiler().IsConnected() ) { lockdata->m_active.store( false, std::memory_order_relaxed ); return; } #endif auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockRelease ); tracy::MemWrite( &item->lockRelease.id, lockdata->m_id ); tracy::MemWrite( &item->lockRelease.time, tracy::Profiler::GetTime() ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int acquired ) { #ifdef TRACY_ON_DEMAND if( !acquired ) return; bool queue = false; const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); const auto active = lockdata->m_active.load( std::memory_order_relaxed ); if( locks == 0 || active ) { const bool connected = tracy::GetProfiler().IsConnected(); if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); if( connected ) queue = true; } if( !queue ) return; #endif if( acquired ) { auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); tracy::Profiler::QueueSerialFinish(); } } TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ) { #ifdef TRACY_ON_DEMAND const auto active = lockdata->m_active.load( std::memory_order_relaxed ); if( !active ) return; const auto connected = tracy::GetProfiler().IsConnected(); if( !connected ) { if( active ) lockdata->m_active.store( false, std::memory_order_relaxed ); return; } #endif auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockMark ); tracy::MemWrite( &item->lockMark.thread, tracy::GetThreadHandle() ); tracy::MemWrite( &item->lockMark.id, lockdata->m_id ); tracy::MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); tracy::Profiler::QueueSerialFinish(); } TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ) { assert( nameSz < (std::numeric_limits::max)() ); auto ptr = (char*)tracy::tracy_malloc( nameSz ); memcpy( ptr, name, nameSz ); auto item = tracy::Profiler::QueueSerial(); tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockName ); tracy::MemWrite( &item->lockNameFat.id, lockdata->m_id ); tracy::MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); tracy::MemWrite( &item->lockNameFat.size, (uint16_t)nameSz ); #ifdef TRACY_ON_DEMAND tracy::GetProfiler().DeferItem( *item ); #endif tracy::Profiler::QueueSerialFinish(); } TRACY_API int ___tracy_connected( void ) { return tracy::GetProfiler().IsConnected(); } #ifdef TRACY_FIBERS TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber, 0 ); } TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } #endif # ifdef TRACY_MANUAL_LIFETIME TRACY_API void ___tracy_startup_profiler( void ) { tracy::StartupProfiler(); } TRACY_API void ___tracy_shutdown_profiler( void ) { tracy::ShutdownProfiler(); } TRACY_API int ___tracy_profiler_started( void ) { return tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ); } # endif #ifdef __cplusplus } #endif #endif