#ifndef __TRACYWORKER_HPP__ #define __TRACYWORKER_HPP__ #include #include #include #include #include #include #include #include #include #include #include #include "../public/common/TracyForceInline.hpp" #include "../public/common/TracyQueue.hpp" #include "../public/common/TracyProtocol.hpp" #include "../public/common/TracySocket.hpp" #include "tracy_robin_hood.h" #include "TracyEvent.hpp" #include "TracyShortPtr.hpp" #include "TracySlab.hpp" #include "TracyStringDiscovery.hpp" #include "TracyTextureCompression.hpp" #include "TracyThreadCompress.hpp" #include "TracyVarArray.hpp" namespace tracy { class FileRead; class FileWrite; namespace EventType { enum Type : uint32_t { Locks = 1 << 0, Messages = 1 << 1, Plots = 1 << 2, Memory = 1 << 3, FrameImages = 1 << 4, ContextSwitches = 1 << 5, Samples = 1 << 6, SymbolCode = 1 << 7, SourceCache = 1 << 8, None = 0, All = std::numeric_limits::max() }; } struct UnsupportedVersion : public std::exception { UnsupportedVersion( int version ) : version( version ) {} int version; }; struct LegacyVersion : public std::exception { LegacyVersion( int version ) : version ( version ) {} int version; }; struct LoadProgress { enum Stage { Initialization, Locks, Messages, Zones, GpuZones, Plots, Memory, CallStacks, FrameImages, ContextSwitches, ContextSwitchesPerCpu }; LoadProgress() : total( 0 ), progress( 0 ), subTotal( 0 ), subProgress( 0 ) {} std::atomic total; std::atomic progress; std::atomic subTotal; std::atomic subProgress; }; class Worker { public: struct ImportEventTimeline { uint64_t tid; uint64_t timestamp; std::string name; std::string text; bool isEnd; std::string locFile; uint32_t locLine; }; struct ImportEventMessages { uint64_t tid; uint64_t timestamp; std::string message; }; struct ImportEventPlots { std::string name; PlotValueFormatting format; std::vector> data; }; struct ZoneThreadData { tracy_force_inline ZoneEvent* Zone() const { return (ZoneEvent*)( _zone_thread >> 16 ); } tracy_force_inline void SetZone( ZoneEvent* zone ) { assert( ( uint64_t( zone ) & 0xFFFF000000000000 ) == 0 ); memcpy( ((char*)&_zone_thread)+2, &zone, 4 ); memcpy( ((char*)&_zone_thread)+6, ((char*)&zone)+4, 2 ); } tracy_force_inline uint16_t Thread() const { return uint16_t( _zone_thread & 0xFFFF ); } tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_zone_thread, &thread, 2 ); } uint64_t _zone_thread; }; enum { ZoneThreadDataSize = sizeof( ZoneThreadData ) }; struct GpuZoneThreadData { tracy_force_inline GpuEvent* Zone() const { return (GpuEvent*)( _zone_thread >> 16 ); } tracy_force_inline void SetZone( GpuEvent* zone ) { assert( ( uint64_t( zone ) & 0xFFFF000000000000 ) == 0 ); memcpy( ((char*)&_zone_thread)+2, &zone, 4 ); memcpy( ((char*)&_zone_thread)+6, ((char*)&zone)+4, 2 ); } tracy_force_inline uint16_t Thread() const { return uint16_t( _zone_thread & 0xFFFF ); } tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_zone_thread, &thread, 2 ); } uint64_t _zone_thread; }; enum { GpuZoneThreadDataSize = sizeof( GpuZoneThreadData ) }; struct CpuThreadTopology { uint32_t package; uint32_t core; }; struct MemoryBlock { const char* data; uint32_t len; }; struct InlineStackData { uint64_t symAddr; CallstackFrameId frame; uint8_t inlineFrame; }; struct PowerData { int64_t lastTime; PlotData* plot; }; #pragma pack( push, 1 ) struct GhostKey { CallstackFrameId frame; uint8_t inlineFrame; }; #pragma pack( pop ) struct GhostKeyHasher { size_t operator()( const GhostKey& key ) const { return charutil::hash( (const char*)&key, sizeof( GhostKey ) ); } }; struct GhostKeyComparator { bool operator()( const GhostKey& lhs, const GhostKey& rhs ) const { return memcmp( &lhs, &rhs, sizeof( GhostKey ) ) == 0; } }; private: struct SourceLocationZones { struct ZtdSort { bool operator()( const ZoneThreadData& lhs, const ZoneThreadData& rhs ) { return lhs.Zone()->Start() < rhs.Zone()->Start(); } }; SortedVector zones; int64_t min = std::numeric_limits::max(); int64_t max = std::numeric_limits::min(); int64_t total = 0; double sumSq = 0; int64_t selfMin = std::numeric_limits::max(); int64_t selfMax = std::numeric_limits::min(); int64_t selfTotal = 0; size_t nonReentrantCount = 0; int64_t nonReentrantMin = std::numeric_limits::max(); int64_t nonReentrantMax = std::numeric_limits::min(); int64_t nonReentrantTotal = 0; }; struct GpuSourceLocationZones { struct GpuZtdSort { bool operator()( const GpuZoneThreadData& lhs, const GpuZoneThreadData& rhs ) { return lhs.Zone()->GpuStart() < rhs.Zone()->GpuStart(); } }; SortedVector zones; int64_t min = std::numeric_limits::max(); int64_t max = std::numeric_limits::min(); int64_t total = 0; double sumSq = 0; }; struct CallstackFrameIdHash { size_t operator()( const CallstackFrameId& id ) const { return id.data; } }; struct CallstackFrameIdCompare { bool operator()( const CallstackFrameId& lhs, const CallstackFrameId& rhs ) const { return lhs.data == rhs.data; } }; struct RevFrameHash { size_t operator()( const CallstackFrameData* data ) const { size_t hash = data->size; for( uint8_t i=0; isize; i++ ) { const auto& v = data->data[i]; hash = ( ( hash << 5 ) + hash ) ^ size_t( v.line ); hash = ( ( hash << 5 ) + hash ) ^ size_t( v.file.Idx() ); hash = ( ( hash << 5 ) + hash ) ^ size_t( v.name.Idx() ); } return hash; } }; struct RevFrameComp { bool operator()( const CallstackFrameData* lhs, const CallstackFrameData* rhs ) const { if( lhs->size != rhs->size ) return false; for( uint8_t i=0; isize; i++ ) { if( memcmp( lhs->data + i, rhs->data + i, sizeof( CallstackFrameBasic ) ) != 0 ) return false; } return true; } }; struct SymbolPending { StringIdx name; StringIdx imageName; StringIdx file; uint32_t line; uint32_t size; bool isInline; }; struct DataBlock { std::mutex lock; StringDiscovery frames; FrameData* framesBase; Vector gpuData; Vector> messages; StringDiscovery plots; Vector threads; Vector zoneExtra; MemData* memory; unordered_flat_map memNameMap; uint64_t zonesCnt = 0; uint64_t gpuCnt = 0; uint64_t samplesCnt = 0; uint64_t ghostCnt = 0; int64_t baseTime = 0; int64_t lastTime = 0; uint64_t frameOffset = 0; CpuArchitecture cpuArch = CpuArchUnknown; uint32_t cpuId = 0; char cpuManufacturer[13]; unordered_flat_map strings; Vector stringData; unordered_flat_map stringMap; unordered_flat_map threadNames; unordered_flat_map> externalNames; unordered_flat_map sourceLocation; Vector> sourceLocationPayload; unordered_flat_map sourceLocationPayloadMap; Vector sourceLocationExpand; #ifndef TRACY_NO_STATISTICS unordered_flat_map sourceLocationZones; bool sourceLocationZonesReady = false; unordered_flat_map gpuSourceLocationZones; bool gpuSourceLocationZonesReady = false; #else unordered_flat_map sourceLocationZonesCnt; unordered_flat_map gpuSourceLocationZonesCnt; #endif unordered_flat_map*, uint32_t, VarArrayHasher, VarArrayComparator> callstackMap; Vector>> callstackPayload; unordered_flat_map callstackFrameMap; unordered_flat_map revFrameMap; unordered_flat_map symbolMap; unordered_flat_map symbolStats; Vector symbolLoc; Vector symbolLocInline; int64_t newSymbolsIndex = -1; int64_t newInlineSymbolsIndex = -1; unordered_flat_map codeSymbolMap; #ifndef TRACY_NO_STATISTICS unordered_flat_map*, uint32_t, VarArrayHasher, VarArrayComparator> parentCallstackMap; Vector>> parentCallstackPayload; unordered_flat_map parentCallstackFrameMap; unordered_flat_map revParentFrameMap; unordered_flat_map postponedSamples; unordered_flat_map pendingInstructionPointers; unordered_flat_map> instructionPointersMap; unordered_flat_map> symbolSamples; unordered_flat_map, CallstackFrameIdHash, CallstackFrameIdCompare> pendingSymbolSamples; unordered_flat_map> childSamples; bool newFramesWereReceived = false; bool callstackSamplesReady = false; bool newContextSwitchesReceived = false; bool ghostZonesReady = false; bool ghostZonesPostponed = false; bool symbolSamplesReady = false; #endif unordered_flat_map lockMap; ThreadCompress localThreadCompress; ThreadCompress externalThreadCompress; Vector>> zoneChildren; Vector>> gpuChildren; #ifndef TRACY_NO_STATISTICS Vector> ghostChildren; Vector ghostFrames; unordered_flat_map ghostFramesMap; #endif Vector>> zoneVectorCache; Vector> frameImage; Vector appInfo; CrashEvent crashEvent; unordered_flat_map ctxSwitch; CpuData cpuData[256]; int cpuDataCount = 0; unordered_flat_map tidToPid; unordered_flat_map cpuThreadData; std::pair threadDataLast = std::make_pair( std::numeric_limits::max(), nullptr ); std::pair ctxSwitchLast = std::make_pair( std::numeric_limits::max(), nullptr ); uint64_t checkSrclocLast = 0; std::pair shrinkSrclocLast = std::make_pair( std::numeric_limits::max(), 0 ); #ifndef TRACY_NO_STATISTICS std::pair srclocZonesLast = std::make_pair( 0, nullptr ); std::pair gpuZonesLast = std::make_pair( 0, nullptr ); #else std::pair srclocCntLast = std::make_pair( 0, nullptr ); std::pair gpuCntLast = std::make_pair( 0, nullptr ); #endif #ifndef TRACY_NO_STATISTICS Vector ctxUsage; bool ctxUsageReady = false; #endif unordered_flat_map>> cpuTopology; unordered_flat_map cpuTopologyMap; unordered_flat_map symbolCode; uint64_t symbolCodeSize = 0; unordered_flat_map sourceFileCache; unordered_flat_map hwSamples; bool hasBranchRetirement = false; unordered_flat_map fiberToThreadMap; }; struct MbpsBlock { MbpsBlock() : mbps( 64 ), compRatio( 1.0 ), queue( 0 ), transferred( 0 ) {} std::shared_mutex lock; std::vector mbps; float compRatio; size_t queue; uint64_t transferred; }; struct FailureData { uint64_t thread; int16_t srcloc; uint32_t callstack; std::string message; }; struct FrameImagePending { const char* image; uint32_t csz; }; public: enum class Failure { None, ZoneStack, ZoneDoubleEnd, ZoneText, ZoneValue, ZoneColor, ZoneName, MemFree, MemAllocTwice, FrameEnd, FrameImageIndex, FrameImageTwice, FiberLeave, NUM_FAILURES }; Worker( const char* addr, uint16_t port ); Worker( const char* name, const char* program, const std::vector& timeline, const std::vector& messages, const std::vector& plots, const std::unordered_map& threadNames ); Worker( FileRead& f, EventType::Type eventMask = EventType::All, bool bgTasks = true ); ~Worker(); const std::string& GetAddr() const { return m_addr; } uint16_t GetPort() const { return m_port; } const std::string& GetCaptureName() const { return m_captureName; } const std::string& GetCaptureProgram() const { return m_captureProgram; } uint64_t GetCaptureTime() const { return m_captureTime; } uint64_t GetExecutableTime() const { return m_executableTime; } const std::string& GetHostInfo() const { return m_hostInfo; } int64_t GetDelay() const { return m_delay; } int64_t GetResolution() const { return m_resolution; } uint64_t GetPid() const { return m_pid; }; CpuArchitecture GetCpuArch() const { return m_data.cpuArch; } uint32_t GetCpuId() const { return m_data.cpuId; } const char* GetCpuManufacturer() const { return m_data.cpuManufacturer; } std::mutex& GetDataLock() { return m_data.lock; } size_t GetFrameCount( const FrameData& fd ) const { return fd.frames.size(); } size_t GetFullFrameCount( const FrameData& fd ) const; bool AreFramesUsed() const; int64_t GetFirstTime() const; int64_t GetLastTime() const { return m_data.lastTime; } uint64_t GetZoneCount() const { return m_data.zonesCnt; } uint64_t GetZoneExtraCount() const { return m_data.zoneExtra.size() - 1; } uint64_t GetGpuZoneCount() const { return m_data.gpuCnt; } uint64_t GetLockCount() const; uint64_t GetPlotCount() const; uint64_t GetTracyPlotCount() const; uint64_t GetContextSwitchCount() const; uint64_t GetContextSwitchPerCpuCount() const; bool HasContextSwitches() const { return !m_data.ctxSwitch.empty(); } uint64_t GetSrcLocCount() const { return m_data.sourceLocationPayload.size() + m_data.sourceLocation.size(); } uint64_t GetCallstackPayloadCount() const { return m_data.callstackPayload.size() - 1; } #ifndef TRACY_NO_STATISTICS uint64_t GetCallstackParentPayloadCount() const { return m_data.parentCallstackPayload.size(); } uint64_t GetCallstackParentFrameCount() const { return m_callstackParentNextIdx; } #endif uint64_t GetCallstackFrameCount() const { return m_data.callstackFrameMap.size(); } uint64_t GetCallstackSampleCount() const { return m_data.samplesCnt; } uint64_t GetSymbolsCount() const { return m_data.symbolMap.size(); } uint64_t GetSymbolCodeCount() const { return m_data.symbolCode.size(); } uint64_t GetSymbolCodeSize() const { return m_data.symbolCodeSize; } uint64_t GetGhostZonesCount() const { return m_data.ghostCnt; } uint32_t GetFrameImageCount() const { return (uint32_t)m_data.frameImage.size(); } uint64_t GetStringsCount() const { return m_data.strings.size() + m_data.stringData.size(); } uint64_t GetHwSampleCountAddress() const { return m_data.hwSamples.size(); } uint64_t GetHwSampleCount() const; bool HasHwBranchRetirement() const { return m_data.hasBranchRetirement; } #ifndef TRACY_NO_STATISTICS uint64_t GetChildSamplesCountSyms() const { return m_data.childSamples.size(); } uint64_t GetChildSamplesCountFull() const; uint64_t GetContextSwitchSampleCount() const; #endif uint64_t GetFrameOffset() const { return m_data.frameOffset; } const FrameData* GetFramesBase() const { return m_data.framesBase; } const Vector& GetFrames() const { return m_data.frames.Data(); } const ContextSwitch* const GetContextSwitchData( uint64_t thread ) { if( m_data.ctxSwitchLast.first == thread ) return m_data.ctxSwitchLast.second; return GetContextSwitchDataImpl( thread ); } const CpuData* GetCpuData() const { return m_data.cpuData; } int GetCpuDataCpuCount() const { return m_data.cpuDataCount; } uint64_t GetPidFromTid( uint64_t tid ) const; const unordered_flat_map& GetCpuThreadData() const { return m_data.cpuThreadData; } const unordered_flat_map& GetSourceFileCache() const { return m_data.sourceFileCache; } uint64_t GetSourceFileCacheCount() const { return m_data.sourceFileCache.size(); } uint64_t GetSourceFileCacheSize() const; MemoryBlock GetSourceFileFromCache( const char* file ) const; HwSampleData* GetHwSampleData( uint64_t addr ); int64_t GetFrameTime( const FrameData& fd, size_t idx ) const; int64_t GetFrameBegin( const FrameData& fd, size_t idx ) const; int64_t GetFrameEnd( const FrameData& fd, size_t idx ) const; const FrameImage* GetFrameImage( const FrameData& fd, size_t idx ) const; std::pair GetFrameRange( const FrameData& fd, int64_t from, int64_t to ); const unordered_flat_map& GetLockMap() const { return m_data.lockMap; } const Vector>& GetMessages() const { return m_data.messages; } const Vector& GetGpuData() const { return m_data.gpuData; } const Vector& GetPlots() const { return m_data.plots.Data(); } const Vector& GetThreadData() const { return m_data.threads; } const ThreadData* GetThreadData( uint64_t tid ) const; const MemData& GetMemoryNamed( uint64_t name ) const; const unordered_flat_map& GetMemNameMap() const { return m_data.memNameMap; } const Vector>& GetFrameImages() const { return m_data.frameImage; } const Vector& GetAppInfo() const { return m_data.appInfo; } const VarArray& GetCallstack( uint32_t idx ) const { return *m_data.callstackPayload[idx]; } const CallstackFrameData* GetCallstackFrame( const CallstackFrameId& ptr ) const; CallstackFrameId PackPointer( uint64_t ptr ) const; uint64_t GetCanonicalPointer( const CallstackFrameId& id ) const; const SymbolData* GetSymbolData( uint64_t sym ) const; bool HasSymbolCode( uint64_t sym ) const; const char* GetSymbolCode( uint64_t sym, uint32_t& len ) const; uint64_t GetSymbolForAddress( uint64_t address ); uint64_t GetSymbolForAddress( uint64_t address, uint32_t& offset ); uint64_t GetInlineSymbolForAddress( uint64_t address ) const; bool HasInlineSymbolAddresses() const { return !m_data.codeSymbolMap.empty(); } StringIdx GetLocationForAddress( uint64_t address, uint32_t& line ) const; const uint64_t* GetInlineSymbolList( uint64_t sym, uint32_t len ); #ifndef TRACY_NO_STATISTICS const VarArray& GetParentCallstack( uint32_t idx ) const { return *m_data.parentCallstackPayload[idx]; } const CallstackFrameData* GetParentCallstackFrame( const CallstackFrameId& ptr ) const; const Vector* GetSamplesForSymbol( uint64_t symAddr ) const; const Vector* GetChildSamples( uint64_t addr ) const; #endif const CrashEvent& GetCrashEvent() const { return m_data.crashEvent; } // Some zones may have incomplete timing data (only start time is available, end hasn't arrived yet). // GetZoneEnd() will try to infer the end time by looking at child zones (parent zone can't end // before its children have ended). // GetZoneEndDirect() will only return zone's direct timing data, without looking at children. tracy_force_inline int64_t GetZoneEnd( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( ev ); } tracy_force_inline int64_t GetZoneEnd( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : GetZoneEndImpl( ev ); } static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : ev.Start(); } static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : ev.GpuStart(); } uint32_t FindStringIdx( const char* str ) const; const char* GetString( uint64_t ptr ) const; const char* GetString( const StringRef& ref ) const; const char* GetString( const StringIdx& idx ) const; const char* GetThreadName( uint64_t id ) const; bool IsThreadLocal( uint64_t id ); bool IsThreadFiber( uint64_t id ); const SourceLocation& GetSourceLocation( int16_t srcloc ) const; std::pair GetExternalName( uint64_t id ) const; const char* GetZoneName( const SourceLocation& srcloc ) const; const char* GetZoneName( const ZoneEvent& ev ) const; const char* GetZoneName( const ZoneEvent& ev, const SourceLocation& srcloc ) const; const char* GetZoneName( const GpuEvent& ev ) const; tracy_force_inline const Vector>& GetZoneChildren( int32_t idx ) const { return m_data.zoneChildren[idx]; } tracy_force_inline const Vector>& GetGpuChildren( int32_t idx ) const { return m_data.gpuChildren[idx]; } #ifndef TRACY_NO_STATISTICS tracy_force_inline const Vector& GetGhostChildren( int32_t idx ) const { return m_data.ghostChildren[idx]; } tracy_force_inline const GhostKey& GetGhostFrame( const Int24& frame ) const { return m_data.ghostFrames[frame.Val()]; } #endif tracy_force_inline const bool HasZoneExtra( const ZoneEvent& ev ) const { return ev.extra != 0; } tracy_force_inline const ZoneExtra& GetZoneExtra( const ZoneEvent& ev ) const { return m_data.zoneExtra[ev.extra]; } std::vector GetMatchingSourceLocation( const char* query, bool ignoreCase ) const; const unordered_flat_map& GetSymbolMap() const { return m_data.symbolMap; } #ifndef TRACY_NO_STATISTICS SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ); const SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ) const; const unordered_flat_map& GetSourceLocationZones() const { return m_data.sourceLocationZones; } const unordered_flat_map& GetGpuSourceLocationZones() const { return m_data.gpuSourceLocationZones; } bool AreSourceLocationZonesReady() const { return m_data.sourceLocationZonesReady; } bool AreGpuSourceLocationZonesReady() const { return m_data.gpuSourceLocationZonesReady; } bool IsCpuUsageReady() const { return m_data.ctxUsageReady; } const Vector& GetCpuUsage() const { return m_data.ctxUsage; } const unordered_flat_map& GetSymbolStats() const { return m_data.symbolStats; } const SymbolStats* GetSymbolStats( uint64_t symAddr ) const; const unordered_flat_map* GetSymbolInstructionPointers( uint64_t symAddr ) const; bool AreCallstackSamplesReady() const { return m_data.callstackSamplesReady; } bool AreGhostZonesReady() const { return m_data.ghostZonesReady; } bool AreSymbolSamplesReady() const { return m_data.symbolSamplesReady; } #endif tracy_force_inline uint16_t CompressThread( uint64_t thread ) { return m_data.localThreadCompress.CompressThread( thread ); } tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const { return m_data.localThreadCompress.DecompressThread( thread ); } tracy_force_inline uint64_t DecompressThreadExternal( uint16_t thread ) const { return m_data.externalThreadCompress.DecompressThread( thread ); } std::shared_mutex& GetMbpsDataLock() { return m_mbpsData.lock; } const std::vector& GetMbpsData() const { return m_mbpsData.mbps; } float GetCompRatio() const { return m_mbpsData.compRatio; } size_t GetSendQueueSize() const { return m_mbpsData.queue; } size_t GetSendInFlight() const { return m_serverQuerySpaceBase - m_serverQuerySpaceLeft; } uint64_t GetDataTransferred() const { return m_mbpsData.transferred; } bool HasData() const { return m_hasData.load( std::memory_order_acquire ); } bool IsConnected() const { return m_connected.load( std::memory_order_relaxed ); } bool IsDataStatic() const { return !m_thread.joinable(); } bool IsBackgroundDone() const { return m_backgroundDone.load( std::memory_order_relaxed ); } bool IsOnDemand() const { return m_onDemand; } void Shutdown() { m_shutdown.store( true, std::memory_order_relaxed ); } void Disconnect(); bool WasDisconnectIssued() const { return m_disconnect; } void Write( FileWrite& f, bool fiDict ); int GetTraceVersion() const { return m_traceVersion; } uint8_t GetHandshakeStatus() const { return m_handshake.load( std::memory_order_relaxed ); } int64_t GetSamplingPeriod() const { return m_samplingPeriod; } bool AreSamplesInconsistent() const { return m_inconsistentSamples; } static const LoadProgress& GetLoadProgress() { return s_loadProgress; } int64_t GetLoadTime() const { return m_loadTime; } void ClearFailure() { m_failure = Failure::None; } Failure GetFailureType() const { return m_failure; } const FailureData& GetFailureData() const { return m_failureData; } static const char* GetFailureString( Failure failure ); const char* UnpackFrameImage( const FrameImage& image ) { return m_texcomp.Unpack( image ); } const Vector& GetParameters() const { return m_params; } void SetParameter( size_t paramIdx, int32_t val ); const decltype(DataBlock::cpuTopology)& GetCpuTopology() const { return m_data.cpuTopology; } const CpuThreadTopology* GetThreadTopology( uint32_t cpuThread ) const; std::pair GetTextureCompressionBytes() const { return std::make_pair( m_texcomp.GetInputBytesCount(), m_texcomp.GetOutputBytesCount() ); } void DoPostponedSymbols(); void DoPostponedInlineSymbols(); void DoPostponedWork(); void DoPostponedWorkAll(); void CacheSourceFiles(); private: void Network(); void Exec(); void Query( ServerQuery type, uint64_t data, uint32_t extra = 0 ); void QueryTerminate(); void QuerySourceFile( const char* fn, const char* image ); void QueryDataTransfer( const void* ptr, size_t size ); tracy_force_inline bool DispatchProcess( const QueueItem& ev, const char*& ptr ); tracy_force_inline bool Process( const QueueItem& ev ); tracy_force_inline void ProcessThreadContext( const QueueThreadContext& ev ); tracy_force_inline void ProcessZoneBegin( const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginCallstack( const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLoc( const QueueZoneBeginLean& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLocCallstack( const QueueZoneBeginLean& ev ); tracy_force_inline void ProcessZoneEnd( const QueueZoneEnd& ev ); tracy_force_inline void ProcessZoneValidation( const QueueZoneValidation& ev ); tracy_force_inline void ProcessFrameMark( const QueueFrameMark& ev ); tracy_force_inline void ProcessFrameMarkStart( const QueueFrameMark& ev ); tracy_force_inline void ProcessFrameMarkEnd( const QueueFrameMark& ev ); tracy_force_inline void ProcessFrameVsync( const QueueFrameVsync& ev ); tracy_force_inline void ProcessFrameImage( const QueueFrameImage& ev ); tracy_force_inline void ProcessZoneText(); tracy_force_inline void ProcessZoneName(); tracy_force_inline void ProcessZoneColor( const QueueZoneColor& ev ); tracy_force_inline void ProcessZoneValue( const QueueZoneValue& ev ); tracy_force_inline void ProcessLockAnnounce( const QueueLockAnnounce& ev ); tracy_force_inline void ProcessLockTerminate( const QueueLockTerminate& ev ); tracy_force_inline void ProcessLockWait( const QueueLockWait& ev ); tracy_force_inline void ProcessLockObtain( const QueueLockObtain& ev ); tracy_force_inline void ProcessLockRelease( const QueueLockRelease& ev ); tracy_force_inline void ProcessLockSharedWait( const QueueLockWait& ev ); tracy_force_inline void ProcessLockSharedObtain( const QueueLockObtain& ev ); tracy_force_inline void ProcessLockSharedRelease( const QueueLockReleaseShared& ev ); tracy_force_inline void ProcessLockMark( const QueueLockMark& ev ); tracy_force_inline void ProcessLockName( const QueueLockName& ev ); tracy_force_inline void ProcessPlotDataInt( const QueuePlotDataInt& ev ); tracy_force_inline void ProcessPlotDataFloat( const QueuePlotDataFloat& ev ); tracy_force_inline void ProcessPlotDataDouble( const QueuePlotDataDouble& ev ); tracy_force_inline void ProcessPlotConfig( const QueuePlotConfig& ev ); tracy_force_inline void ProcessMessage( const QueueMessage& ev ); tracy_force_inline void ProcessMessageLiteral( const QueueMessageLiteral& ev ); tracy_force_inline void ProcessMessageColor( const QueueMessageColor& ev ); tracy_force_inline void ProcessMessageLiteralColor( const QueueMessageColorLiteral& ev ); tracy_force_inline void ProcessMessageCallstack( const QueueMessage& ev ); tracy_force_inline void ProcessMessageLiteralCallstack( const QueueMessageLiteral& ev ); tracy_force_inline void ProcessMessageColorCallstack( const QueueMessageColor& ev ); tracy_force_inline void ProcessMessageLiteralColorCallstack( const QueueMessageColorLiteral& ev ); tracy_force_inline void ProcessMessageAppInfo( const QueueMessage& ev ); tracy_force_inline void ProcessGpuNewContext( const QueueGpuNewContext& ev ); tracy_force_inline void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneBeginAllocSrcLoc( const QueueGpuZoneBeginLean& ev, bool serial ); tracy_force_inline void ProcessGpuZoneBeginAllocSrcLocCallstack( const QueueGpuZoneBeginLean& ev, bool serial ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev ); tracy_force_inline void ProcessGpuContextName( const QueueGpuContextName& ev ); tracy_force_inline MemEvent* ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemAllocNamed( const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemFree( const QueueMemFree& ev ); tracy_force_inline MemEvent* ProcessMemFreeNamed( const QueueMemFree& ev ); tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev ); tracy_force_inline void ProcessMemAllocCallstackNamed( const QueueMemAlloc& ev ); tracy_force_inline void ProcessMemFreeCallstack( const QueueMemFree& ev ); tracy_force_inline void ProcessMemFreeCallstackNamed( const QueueMemFree& ev ); tracy_force_inline void ProcessCallstackSerial(); tracy_force_inline void ProcessCallstack(); tracy_force_inline void ProcessCallstackSample( const QueueCallstackSample& ev ); tracy_force_inline void ProcessCallstackSampleContextSwitch( const QueueCallstackSample& ev ); tracy_force_inline void ProcessCallstackFrameSize( const QueueCallstackFrameSize& ev ); tracy_force_inline void ProcessCallstackFrame( const QueueCallstackFrame& ev, bool querySymbols ); tracy_force_inline void ProcessSymbolInformation( const QueueSymbolInformation& ev ); tracy_force_inline void ProcessCrashReport( const QueueCrashReport& ev ); tracy_force_inline void ProcessSysTime( const QueueSysTime& ev ); tracy_force_inline void ProcessSysPower( const QueueSysPower& ev ); tracy_force_inline void ProcessContextSwitch( const QueueContextSwitch& ev ); tracy_force_inline void ProcessThreadWakeup( const QueueThreadWakeup& ev ); tracy_force_inline void ProcessTidToPid( const QueueTidToPid& ev ); tracy_force_inline void ProcessHwSampleCpuCycle( const QueueHwSample& ev ); tracy_force_inline void ProcessHwSampleInstructionRetired( const QueueHwSample& ev ); tracy_force_inline void ProcessHwSampleCacheReference( const QueueHwSample& ev ); tracy_force_inline void ProcessHwSampleCacheMiss( const QueueHwSample& ev ); tracy_force_inline void ProcessHwSampleBranchRetired( const QueueHwSample& ev ); tracy_force_inline void ProcessHwSampleBranchMiss( const QueueHwSample& ev ); tracy_force_inline void ProcessParamSetup( const QueueParamSetup& ev ); tracy_force_inline void ProcessSourceCodeNotAvailable( const QueueSourceCodeNotAvailable& ev ); tracy_force_inline void ProcessCpuTopology( const QueueCpuTopology& ev ); tracy_force_inline void ProcessMemNamePayload( const QueueMemNamePayload& ev ); tracy_force_inline void ProcessFiberEnter( const QueueFiberEnter& ev ); tracy_force_inline void ProcessFiberLeave( const QueueFiberLeave& ev ); tracy_force_inline ZoneEvent* AllocZoneEvent(); tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBeginLean& ev ); tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); tracy_force_inline void ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); tracy_force_inline void ProcessPlotDataImpl( uint64_t name, int64_t evTime, double val ); tracy_force_inline MemEvent* ProcessMemAllocImpl( MemData& memdata, const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemFreeImpl( MemData& memdata, const QueueMemFree& ev ); tracy_force_inline void ProcessCallstackSampleImpl( const SampleData& sd, ThreadData& td ); tracy_force_inline void ProcessCallstackSampleInsertSample( const SampleData& sd, ThreadData& td ); #ifndef TRACY_NO_STATISTICS tracy_force_inline void ProcessCallstackSampleImplStats( const SampleData& sd, ThreadData& td ); #endif void ZoneStackFailure( uint64_t thread, const ZoneEvent* ev ); void ZoneDoubleEndFailure( uint64_t thread, const ZoneEvent* ev ); void ZoneTextFailure( uint64_t thread, const char* text ); void ZoneValueFailure( uint64_t thread, uint64_t value ); void ZoneColorFailure( uint64_t thread ); void ZoneNameFailure( uint64_t thread ); void MemFreeFailure( uint64_t thread ); void MemAllocTwiceFailure( uint64_t thread ); void FrameEndFailure(); void FrameImageIndexFailure(); void FrameImageTwiceFailure(); void FiberLeaveFailure(); tracy_force_inline void CheckSourceLocation( uint64_t ptr ); void NewSourceLocation( uint64_t ptr ); tracy_force_inline int16_t ShrinkSourceLocation( uint64_t srcloc ) { if( m_data.shrinkSrclocLast.first == srcloc ) return m_data.shrinkSrclocLast.second; return ShrinkSourceLocationReal( srcloc ); } int16_t ShrinkSourceLocationReal( uint64_t srcloc ); int16_t NewShrinkedSourceLocation( uint64_t srcloc ); tracy_force_inline void MemAllocChanged( MemData& memdata, int64_t time ); void CreateMemAllocPlot( MemData& memdata ); void ReconstructMemAllocPlot( MemData& memdata ); void InsertMessageData( MessageData* msg ); ThreadData* NoticeThreadReal( uint64_t thread ); ThreadData* NewThread( uint64_t thread, bool fiber ); tracy_force_inline ThreadData* NoticeThread( uint64_t thread ) { if( m_data.threadDataLast.first == thread ) return m_data.threadDataLast.second; return NoticeThreadReal( thread ); } ThreadData* RetrieveThreadReal( uint64_t thread ); tracy_force_inline ThreadData* RetrieveThread( uint64_t thread ) { if( m_data.threadDataLast.first == thread ) return m_data.threadDataLast.second; return RetrieveThreadReal( thread ); } tracy_force_inline ThreadData* GetCurrentThreadData(); #ifndef TRACY_NO_STATISTICS SourceLocationZones* GetSourceLocationZones( uint16_t srcloc ) { if( m_data.srclocZonesLast.first == srcloc ) return m_data.srclocZonesLast.second; return GetSourceLocationZonesReal( srcloc ); } SourceLocationZones* GetSourceLocationZonesReal( uint16_t srcloc ); GpuSourceLocationZones* GetGpuSourceLocationZones( uint16_t srcloc ) { if( m_data.gpuZonesLast.first == srcloc ) return m_data.gpuZonesLast.second; return GetGpuSourceLocationZonesReal( srcloc ); } GpuSourceLocationZones* GetGpuSourceLocationZonesReal( uint16_t srcloc ); #else uint64_t* GetSourceLocationZonesCnt( uint16_t srcloc ) { if( m_data.srclocCntLast.first == srcloc ) return m_data.srclocCntLast.second; return GetSourceLocationZonesCntReal( srcloc ); } uint64_t* GetSourceLocationZonesCntReal( uint16_t srcloc ); uint64_t* GetGpuSourceLocationZonesCnt( uint16_t srcloc ) { if( m_data.gpuCntLast.first == srcloc ) return m_data.gpuCntLast.second; return GetGpuSourceLocationZonesCntReal( srcloc ); } uint64_t* GetGpuSourceLocationZonesCntReal( uint16_t srcloc ); #endif tracy_force_inline void NewZone( ZoneEvent* zone ); void InsertLockEvent( LockMap& lockmap, LockEvent* lev, uint64_t thread, int64_t time ); bool CheckString( uint64_t ptr ); void CheckThreadString( uint64_t id ); void CheckFiberName( uint64_t id, uint64_t tid ); void CheckExternalName( uint64_t id ); void AddSourceLocation( const QueueSourceLocation& srcloc ); void AddSourceLocationPayload( const char* data, size_t sz ); void AddString( uint64_t ptr, const char* str, size_t sz ); void AddThreadString( uint64_t id, const char* str, size_t sz ); void AddFiberName( uint64_t id, const char* str, size_t sz ); void AddSingleString( const char* str, size_t sz ); void AddSingleStringFailure( const char* str, size_t sz ); void AddSecondString( const char* str, size_t sz ); void AddExternalName( uint64_t ptr, const char* str, size_t sz ); void AddExternalThreadName( uint64_t ptr, const char* str, size_t sz ); void AddFrameImageData( const char* data, size_t sz ); void AddSymbolCode( uint64_t ptr, const char* data, size_t sz ); void AddSourceCode( uint32_t id, const char* data, size_t sz ); tracy_force_inline void AddCallstackPayload( const char* data, size_t sz ); tracy_force_inline void AddCallstackAllocPayload( const char* data ); uint32_t MergeCallstacks( uint32_t first, uint32_t second ); void InsertPlot( PlotData* plot, int64_t time, double val ); void HandlePlotName( uint64_t name, const char* str, size_t sz ); void HandleFrameName( uint64_t name, const char* str, size_t sz ); void HandlePostponedSamples(); void HandlePostponedGhostZones(); bool IsFailureThreadStringRetrieved(); bool IsSourceLocationRetrieved( int16_t srcloc ); bool IsCallstackRetrieved( uint32_t callstack ); bool HasAllFailureData(); void HandleFailure( const char* ptr, const char* end ); void DispatchFailure( const QueueItem& ev, const char*& ptr ); uint32_t GetSingleStringIdx(); uint32_t GetSecondStringIdx(); StringLocation StoreString( const char* str, size_t sz ); const ContextSwitch* const GetContextSwitchDataImpl( uint64_t thread ); void CacheSource( const StringRef& str, const StringIdx& image = StringIdx() ); void CacheSourceFromFile( const char* fn ); tracy_force_inline Vector>& GetZoneChildrenMutable( int32_t idx ) { return m_data.zoneChildren[idx]; } tracy_force_inline Vector>& GetGpuChildrenMutable( int32_t idx ) { return m_data.gpuChildren[idx]; } #ifndef TRACY_NO_STATISTICS tracy_force_inline Vector& GetGhostChildrenMutable( int32_t idx ) { return m_data.ghostChildren[idx]; } #endif #ifndef TRACY_NO_STATISTICS void ReconstructContextSwitchUsage(); bool UpdateSampleStatistics( uint32_t callstack, uint32_t count, bool canPostpone ); void UpdateSampleStatisticsPostponed( decltype(Worker::DataBlock::postponedSamples.begin())& it ); void UpdateSampleStatisticsImpl( const CallstackFrameData** frames, uint16_t framesCount, uint32_t count, const VarArray& cs ); tracy_force_inline void GetStackWithInlines( Vector& ret, const VarArray& cs ); tracy_force_inline int AddGhostZone( const VarArray& cs, Vector* vec, uint64_t t ); #endif tracy_force_inline int64_t ReadTimeline( FileRead& f, ZoneEvent* zone, int64_t refTime, int32_t& childIdx ); tracy_force_inline int64_t ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t refTime, int32_t& childIdx, uint32_t sz ); tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx ); tracy_force_inline void ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz ); #ifndef TRACY_NO_STATISTICS tracy_force_inline void ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread ); tracy_force_inline void ReconstructZoneStatistics( GpuEvent& zone, uint16_t thread ); #else tracy_force_inline void CountZoneStatistics( ZoneEvent* zone ); tracy_force_inline void CountZoneStatistics( GpuEvent* zone ); #endif tracy_force_inline ZoneExtra& GetZoneExtraMutable( const ZoneEvent& ev ) { return m_data.zoneExtra[ev.extra]; } tracy_force_inline ZoneExtra& AllocZoneExtra( ZoneEvent& ev ); tracy_force_inline ZoneExtra& RequestZoneExtra( ZoneEvent& ev ); int64_t GetZoneEndImpl( const ZoneEvent& ev ); int64_t GetZoneEndImpl( const GpuEvent& ev ); void UpdateMbps( int64_t td ); int64_t ReadTimeline( FileRead& f, Vector>& vec, uint32_t size, int64_t refTime, int32_t& childIdx ); void ReadTimeline( FileRead& f, Vector>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx ); tracy_force_inline void WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime ); tracy_force_inline void WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime, int64_t& refGpuTime ); template void WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime ); template void WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime, int64_t& refGpuTime ); int64_t TscTime( int64_t tsc ) { return int64_t( ( tsc - m_data.baseTime ) * m_timerMul ); } int64_t TscTime( uint64_t tsc ) { return int64_t( ( tsc - m_data.baseTime ) * m_timerMul ); } int64_t TscPeriod( uint64_t tsc ) { return int64_t( tsc * m_timerMul ); } Socket m_sock; std::string m_addr; uint16_t m_port; std::thread m_thread; std::thread m_threadNet; std::atomic m_connected { false }; std::atomic m_hasData; std::atomic m_shutdown { false }; std::atomic m_backgroundDone { true }; std::thread m_threadBackground; int64_t m_delay; int64_t m_resolution; double m_timerMul; std::string m_captureName; std::string m_captureProgram; uint64_t m_captureTime; uint64_t m_executableTime; std::string m_hostInfo; uint64_t m_pid; int64_t m_samplingPeriod; bool m_terminate = false; bool m_crashed = false; bool m_disconnect = false; void* m_stream; // LZ4_streamDecode_t* char* m_buffer; int m_bufferOffset; bool m_onDemand; bool m_ignoreMemFreeFaults; bool m_codeTransfer; bool m_combineSamples; bool m_identifySamples = false; bool m_inconsistentSamples; short_ptr m_gpuCtxMap[256]; uint32_t m_pendingCallstackId = 0; int16_t m_pendingSourceLocationPayload = 0; Vector m_sourceLocationQueue; unordered_flat_map m_sourceLocationShrink; unordered_flat_map m_threadMap; unordered_flat_map m_vsyncFrameMap; FrameImagePending m_pendingFrameImageData = {}; unordered_flat_map m_pendingSymbols; unordered_flat_set m_pendingFileStrings; unordered_flat_set m_checkedFileStrings; StringLocation m_pendingSingleString = {}; StringLocation m_pendingSecondString = {}; uint32_t m_pendingStrings; uint32_t m_pendingThreads; uint32_t m_pendingFibers; uint32_t m_pendingExternalNames; uint32_t m_pendingSourceLocation; uint32_t m_pendingCallstackFrames; uint8_t m_pendingCallstackSubframes; uint32_t m_pendingSymbolCode; CallstackFrameData* m_callstackFrameStaging; uint64_t m_callstackFrameStagingPtr; uint64_t m_callstackAllocNextIdx = 0; uint64_t m_callstackParentNextIdx = 0; uint32_t m_serialNextCallstack = 0; uint64_t m_memNamePayload = 0; Slab<64*1024*1024> m_slab; DataBlock m_data; MbpsBlock m_mbpsData; int m_traceVersion; std::atomic m_handshake { 0 }; static LoadProgress s_loadProgress; int64_t m_loadTime; Failure m_failure = Failure::None; FailureData m_failureData = {}; PlotData* m_sysTimePlot = nullptr; Vector m_serverQueryQueue, m_serverQueryQueuePrio; size_t m_serverQuerySpaceLeft, m_serverQuerySpaceBase; unordered_flat_map m_frameImageStaging; char* m_frameImageBuffer = nullptr; size_t m_frameImageBufferSize = 0; TextureCompression m_texcomp; uint64_t m_threadCtx = 0; ThreadData* m_threadCtxData = nullptr; int64_t m_refTimeThread = 0; int64_t m_refTimeSerial = 0; int64_t m_refTimeCtx = 0; int64_t m_refTimeGpu = 0; std::atomic m_bytes { 0 }; std::atomic m_decBytes { 0 }; struct NetBuffer { int bufferOffset; int size; }; std::vector m_netRead; std::mutex m_netReadLock; std::condition_variable m_netReadCv; int m_netWriteCnt = 0; std::mutex m_netWriteLock; std::condition_variable m_netWriteCv; #ifdef TRACY_NO_STATISTICS Vector m_zoneEventPool; #endif Vector m_params; char* m_tmpBuf = nullptr; size_t m_tmpBufSize = 0; unordered_flat_map m_nextCallstack; unordered_flat_map m_sourceCodeQuery; uint32_t m_nextSourceCodeQuery = 0; unordered_flat_map m_powerData; }; } #endif