#ifndef __TRACYWORKER_HPP__ #define __TRACYWORKER_HPP__ #include #include #include #include #include #include #include #include "../common/tracy_lz4.hpp" #include "../common/TracyForceInline.hpp" #include "../common/TracyMutex.hpp" #include "../common/TracyQueue.hpp" #include "../common/TracySocket.hpp" #include "tracy_flat_hash_map.hpp" #include "TracyEvent.hpp" #include "TracySlab.hpp" #include "TracyStringDiscovery.hpp" #include "TracyVarArray.hpp" namespace tracy { class FileRead; class FileWrite; namespace EventType { enum Type : uint32_t { Locks = 1 << 0, Messages = 1 << 1, Plots = 1 << 2, Memory = 1 << 3, None = 0, All = std::numeric_limits::max() }; } struct UnsupportedVersion : public std::exception { UnsupportedVersion( int version ) : version( version ) {} int version; }; struct LoadProgress { enum Stage { Initialization, Locks, Messages, Zones, GpuZones, Plots, Memory, CallStacks }; LoadProgress() : total( 0 ), progress( 0 ), subTotal( 0 ), subProgress( 0 ) {} std::atomic total; std::atomic progress; std::atomic subTotal; std::atomic subProgress; }; class Worker { public: #pragma pack( 1 ) struct ZoneThreadData { ZoneEvent* zone; uint16_t thread; }; #pragma pack() private: struct SourceLocationZones { SourceLocationZones() : min( std::numeric_limits::max() ) , max( std::numeric_limits::min() ) , total( 0 ) , selfTotal( 0 ) {} Vector zones; int64_t min; int64_t max; int64_t total; int64_t selfTotal; }; struct DataBlock { DataBlock() : zonesCnt( 0 ), lastTime( 0 ), frameOffset( 0 ), threadLast( std::numeric_limits::max(), 0 ) {} TracyMutex lock; StringDiscovery frames; FrameData* framesBase; Vector gpuData; Vector messages; StringDiscovery plots; Vector threads; MemData memory; uint64_t zonesCnt; int64_t lastTime; uint64_t frameOffset; flat_hash_map> strings; Vector stringData; flat_hash_map stringMap; flat_hash_map> threadNames; flat_hash_map> sourceLocation; Vector sourceLocationPayload; flat_hash_map sourceLocationPayloadMap; Vector sourceLocationExpand; #ifndef TRACY_NO_STATISTICS flat_hash_map> sourceLocationZones; bool sourceLocationZonesReady; #else flat_hash_map sourceLocationZonesCnt; #endif flat_hash_map*, uint32_t, VarArrayHasherPOT, VarArrayComparator> callstackMap; Vector*> callstackPayload; flat_hash_map callstackFrameMap; std::map lockMap; flat_hash_map> threadMap; Vector threadExpand; std::pair threadLast; std::vector> m_zoneChildren; std::vector> m_gpuChildren; CrashEvent m_crashEvent; }; struct MbpsBlock { MbpsBlock() : mbps( 64 ), compRatio( 1.0 ) {} TracyMutex lock; std::vector mbps; float compRatio; }; enum class NextCallstackType { Zone, Gpu, Crash }; struct NextCallstack { NextCallstackType type; union { ZoneEvent* zone; GpuEvent* gpu; }; }; struct FailureData { uint64_t thread; int32_t srcloc; }; public: enum class Failure { None, ZoneStack, ZoneEnd, MemFree, NUM_FAILURES }; Worker( const char* addr ); Worker( FileRead& f, EventType::Type eventMask = EventType::All ); ~Worker(); const std::string& GetAddr() const { return m_addr; } const std::string& GetCaptureName() const { return m_captureName; } const std::string& GetCaptureProgram() const { return m_captureProgram; } uint64_t GetCaptureTime() const { return m_captureTime; } const std::string& GetHostInfo() const { return m_hostInfo; } int64_t GetDelay() const { return m_delay; } int64_t GetResolution() const { return m_resolution; } TracyMutex& GetDataLock() { return m_data.lock; } size_t GetFrameCount( const FrameData& fd ) const { return fd.frames.size(); } size_t GetFullFrameCount( const FrameData& fd ) const; int64_t GetTimeBegin() const { return GetFrameBegin( *m_data.framesBase, 0 ); } int64_t GetLastTime() const { return m_data.lastTime; } uint64_t GetZoneCount() const { return m_data.zonesCnt; } uint64_t GetLockCount() const; uint64_t GetPlotCount() const; uint64_t GetSrcLocCount() const { return m_data.sourceLocationPayload.size() + m_data.sourceLocation.size(); } uint64_t GetCallstackPayloadCount() const { return m_data.callstackPayload.size() - 1; } uint64_t GetCallstackFrameCount() const { return m_data.callstackFrameMap.size(); } uint64_t GetFrameOffset() const { return m_data.frameOffset; } const FrameData* GetFramesBase() const { return m_data.framesBase; } const Vector& GetFrames() const { return m_data.frames.Data(); } int64_t GetFrameTime( const FrameData& fd, size_t idx ) const; int64_t GetFrameBegin( const FrameData& fd, size_t idx ) const; int64_t GetFrameEnd( const FrameData& fd, size_t idx ) const; std::pair GetFrameRange( const FrameData& fd, int64_t from, int64_t to ); const std::map& GetLockMap() const { return m_data.lockMap; } const Vector& GetMessages() const { return m_data.messages; } const Vector& GetGpuData() const { return m_data.gpuData; } const Vector& GetPlots() const { return m_data.plots.Data(); } const Vector& GetThreadData() const { return m_data.threads; } const MemData& GetMemData() const { return m_data.memory; } const VarArray& GetCallstack( uint32_t idx ) const { return *m_data.callstackPayload[idx]; } const CallstackFrame* GetCallstackFrame( uint64_t ptr ) const; const CrashEvent& GetCrashEvent() const { return m_data.m_crashEvent; } // Some zones may have incomplete timing data (only start time is available, end hasn't arrived yet). // GetZoneEnd() will try to infer the end time by looking at child zones (parent zone can't end // before its children have ended). // GetZoneEndDirect() will only return zone's direct timing data, without looking at children. int64_t GetZoneEnd( const ZoneEvent& ev ); int64_t GetZoneEnd( const GpuEvent& ev ); static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.end >= 0 ? ev.end : ev.start; } static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.gpuEnd >= 0 ? ev.gpuEnd : ev.gpuStart; } const char* GetString( uint64_t ptr ) const; const char* GetString( const StringRef& ref ) const; const char* GetString( const StringIdx& idx ) const; const char* GetThreadString( uint64_t id ) const; const SourceLocation& GetSourceLocation( int32_t srcloc ) const; const char* GetZoneName( const ZoneEvent& ev ) const; const char* GetZoneName( const ZoneEvent& ev, const SourceLocation& srcloc ) const; const char* GetZoneName( const GpuEvent& ev ) const; const char* GetZoneName( const GpuEvent& ev, const SourceLocation& srcloc ) const; tracy_force_inline const Vector& GetZoneChildren( int32_t idx ) const { return m_data.m_zoneChildren[idx]; } tracy_force_inline const Vector& GetGpuChildren( int32_t idx ) const { return m_data.m_gpuChildren[idx]; } std::vector GetMatchingSourceLocation( const char* query, bool ignoreCase ) const; #ifndef TRACY_NO_STATISTICS const SourceLocationZones& GetZonesForSourceLocation( int32_t srcloc ) const; const flat_hash_map>& GetSourceLocationZones() const { return m_data.sourceLocationZones; } bool AreSourceLocationZonesReady() const { return m_data.sourceLocationZonesReady; } #endif tracy_force_inline uint16_t CompressThread( uint64_t thread ) { if( m_data.threadLast.first == thread ) return m_data.threadLast.second; return CompressThreadReal( thread ); } tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const { assert( thread < m_data.threadExpand.size() ); return m_data.threadExpand[thread]; } TracyMutex& GetMbpsDataLock() { return m_mbpsData.lock; } const std::vector& GetMbpsData() const { return m_mbpsData.mbps; } float GetCompRatio() const { return m_mbpsData.compRatio; } bool HasData() const { return m_hasData.load( std::memory_order_acquire ); } bool IsConnected() const { return m_connected.load( std::memory_order_relaxed ); } bool IsDataStatic() const { return !m_thread.joinable(); } void Shutdown() { m_shutdown.store( true, std::memory_order_relaxed ); } void Write( FileWrite& f ); int GetTraceVersion() const { return m_traceVersion; } uint8_t GetHandshakeStatus() const { return m_handshake.load( std::memory_order_relaxed ); } static const LoadProgress& GetLoadProgress() { return s_loadProgress; } int64_t GetLoadTime() const { return m_loadTime; } void ClearFailure() { m_failure = Failure::None; } Failure GetFailureType() const { return m_failure; } const FailureData& GetFailureData() const { return m_failureData; } static const char* GetFailureString( Failure failure ); private: void Exec(); void ServerQuery( uint8_t type, uint64_t data ); tracy_force_inline bool DispatchProcess( const QueueItem& ev, char*& ptr ); tracy_force_inline bool Process( const QueueItem& ev ); tracy_force_inline void ProcessZoneBegin( const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginCallstack( const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLoc( const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneEnd( const QueueZoneEnd& ev ); tracy_force_inline void ProcessZoneValidation( const QueueZoneValidation& ev ); tracy_force_inline void ProcessFrameMark( const QueueFrameMark& ev ); tracy_force_inline void ProcessFrameMarkStart( const QueueFrameMark& ev ); tracy_force_inline void ProcessFrameMarkEnd( const QueueFrameMark& ev ); tracy_force_inline void ProcessZoneText( const QueueZoneText& ev ); tracy_force_inline void ProcessZoneName( const QueueZoneText& ev ); tracy_force_inline void ProcessLockAnnounce( const QueueLockAnnounce& ev ); tracy_force_inline void ProcessLockTerminate( const QueueLockTerminate& ev ); tracy_force_inline void ProcessLockWait( const QueueLockWait& ev ); tracy_force_inline void ProcessLockObtain( const QueueLockObtain& ev ); tracy_force_inline void ProcessLockRelease( const QueueLockRelease& ev ); tracy_force_inline void ProcessLockSharedWait( const QueueLockWait& ev ); tracy_force_inline void ProcessLockSharedObtain( const QueueLockObtain& ev ); tracy_force_inline void ProcessLockSharedRelease( const QueueLockRelease& ev ); tracy_force_inline void ProcessLockMark( const QueueLockMark& ev ); tracy_force_inline void ProcessPlotData( const QueuePlotData& ev ); tracy_force_inline void ProcessMessage( const QueueMessage& ev ); tracy_force_inline void ProcessMessageLiteral( const QueueMessage& ev ); tracy_force_inline void ProcessGpuNewContext( const QueueGpuNewContext& ev ); tracy_force_inline void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ); tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev ); tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev ); tracy_force_inline void ProcessMemFreeCallstack( const QueueMemFree& ev ); tracy_force_inline void ProcessCallstackMemory( const QueueCallstackMemory& ev ); tracy_force_inline void ProcessCallstack( const QueueCallstack& ev ); tracy_force_inline void ProcessCallstackFrame( const QueueCallstackFrame& ev ); tracy_force_inline void ProcessCrashReport( const QueueCrashReport& ev ); tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ); tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev ); void ZoneStackFailure( uint64_t thread, const ZoneEvent* ev ); void ZoneEndFailure( uint64_t thread ); void MemFreeFailure( uint64_t thread ); tracy_force_inline void CheckSourceLocation( uint64_t ptr ); void NewSourceLocation( uint64_t ptr ); tracy_force_inline uint32_t ShrinkSourceLocation( uint64_t srcloc ); uint32_t NewShrinkedSourceLocation( uint64_t srcloc ); tracy_force_inline void MemAllocChanged( int64_t time ); void CreateMemAllocPlot(); void ReconstructMemAllocPlot(); void InsertMessageData( MessageData* msg, uint64_t thread ); ThreadData* NewThread( uint64_t thread ); ThreadData* NoticeThread( uint64_t thread ); tracy_force_inline void NewZone( ZoneEvent* zone, uint64_t thread ); void InsertLockEvent( LockMap& lockmap, LockEvent* lev, uint64_t thread ); void CheckString( uint64_t ptr ); void CheckThreadString( uint64_t id ); void AddSourceLocation( const QueueSourceLocation& srcloc ); void AddSourceLocationPayload( uint64_t ptr, char* data, size_t sz ); void AddString( uint64_t ptr, char* str, size_t sz ); void AddThreadString( uint64_t id, char* str, size_t sz ); void AddCustomString( uint64_t ptr, char* str, size_t sz ); tracy_force_inline void AddCallstackPayload( uint64_t ptr, char* data, size_t sz ); void InsertPlot( PlotData* plot, int64_t time, double val ); void HandlePlotName( uint64_t name, char* str, size_t sz ); void HandleFrameName( uint64_t name, char* str, size_t sz ); void HandlePostponedPlots(); StringLocation StoreString( char* str, size_t sz ); uint16_t CompressThreadReal( uint64_t thread ); uint16_t CompressThreadNew( uint64_t thread ); tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime ); tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer ); tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime ); tracy_force_inline void ReadTimelinePre042( FileRead& f, GpuEvent* zone, int fileVer ); tracy_force_inline void ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread ); void ReadTimeline( FileRead& f, Vector& vec, uint16_t thread, uint64_t size, int64_t& refTime ); void ReadTimelinePre042( FileRead& f, Vector& vec, uint16_t thread, uint64_t size, int fileVer ); void ReadTimeline( FileRead& f, Vector& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime ); void ReadTimelinePre042( FileRead& f, Vector& vec, uint64_t size, int fileVer ); void WriteTimeline( FileWrite& f, const Vector& vec, int64_t& refTime ); void WriteTimeline( FileWrite& f, const Vector& vec, int64_t& refTime, int64_t& refGpuTime ); int64_t TscTime( int64_t tsc ) { return int64_t( tsc * m_timerMul ); } int64_t TscTime( uint64_t tsc ) { return int64_t( tsc * m_timerMul ); } Socket m_sock; std::string m_addr; std::thread m_thread; std::atomic m_connected = { false }; std::atomic m_hasData; std::atomic m_shutdown = { false }; std::thread m_threadMemory, m_threadZones; int64_t m_delay; int64_t m_resolution; double m_timerMul; std::string m_captureName; std::string m_captureProgram; uint64_t m_captureTime; std::string m_hostInfo; bool m_terminate = false; bool m_crashed = false; LZ4_streamDecode_t* m_stream; char* m_buffer; int m_bufferOffset; bool m_onDemand; GpuCtxData* m_gpuCtxMap[256]; flat_hash_map> m_pendingCustomStrings; flat_hash_map m_pendingCallstacks; flat_hash_map> m_pendingSourceLocationPayload; Vector m_sourceLocationQueue; flat_hash_map> m_sourceLocationShrink; flat_hash_map> m_threadMap; flat_hash_map> m_nextCallstack; uint32_t m_pendingStrings; uint32_t m_pendingThreads; uint32_t m_pendingSourceLocation; uint32_t m_pendingCallstackFrames; uint64_t m_lastMemActionCallstack; bool m_lastMemActionWasAlloc; Slab<64*1024*1024> m_slab; DataBlock m_data; MbpsBlock m_mbpsData; int m_traceVersion; std::atomic m_handshake = { 0 }; static LoadProgress s_loadProgress; int64_t m_loadTime; Failure m_failure = Failure::None; FailureData m_failureData; }; } #endif