From 3b03e849f021070ed8c0796f6bbb22d4a1d56f86 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sat, 31 Mar 2018 14:03:55 +0200 Subject: [PATCH] Harden client code against unaligned memory access. There shouldn't be any changes in generated code on modern architectures, as the memcpy will be reduced to a store/load operation identical to the one generated with plain struct member access. GetTime( cpu ) needs special handling, as the MSVC intrinsic for rdtscp can't store cpu identifier in a register. Using intermediate variable would cause store to stack, read from stack, store to the destination address. Since rdtscp is only available on x86, which handles unaligned stores without any problems, we can have one place with direct struct member access. --- TracyLua.hpp | 55 ++++++++++------ TracyOpenGL.hpp | 41 ++++++------ client/TracyLock.hpp | 135 ++++++++++++++++++++------------------- client/TracyProfiler.cpp | 114 ++++++++++++++++++++------------- client/TracyProfiler.hpp | 51 +++++++-------- client/TracyScoped.hpp | 33 +++++++--- common/TracyProtocol.hpp | 4 +- 7 files changed, 247 insertions(+), 186 deletions(-) diff --git a/TracyLua.hpp b/TracyLua.hpp index d1f3b4a0..d6ce4ecb 100644 --- a/TracyLua.hpp +++ b/TracyLua.hpp @@ -101,6 +101,7 @@ static inline void LuaRemove( char* script ) #else #include "common/TracyColor.hpp" +#include "common/TracyAlign.hpp" #include "common/TracySystem.hpp" #include "client/TracyProfiler.hpp" @@ -143,10 +144,16 @@ static inline int LuaZoneBegin( lua_State* L ) auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneBeginAllocSrcLoc; - item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu ); - item->zoneBegin.thread = GetThreadHandle(); - item->zoneBegin.srcloc = (uint64_t)ptr; + MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) ); + MemWrite( &item->zoneBegin.cpu, cpu ); +#endif + MemWrite( &item->zoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr ); tail.store( magic + 1, std::memory_order_release ); return 0; } @@ -188,10 +195,16 @@ static inline int LuaZoneBeginN( lua_State* L ) auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneBeginAllocSrcLoc; - item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu ); - item->zoneBegin.thread = GetThreadHandle(); - item->zoneBegin.srcloc = (uint64_t)ptr; + MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) ); + MemWrite( &item->zoneBegin.cpu, cpu ); +#endif + MemWrite( &item->zoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr ); tail.store( magic + 1, std::memory_order_release ); return 0; } @@ -202,9 +215,15 @@ static inline int LuaZoneEnd( lua_State* L ) auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneEnd; - item->zoneEnd.time = Profiler::GetTime( item->zoneEnd.cpu ); - item->zoneEnd.thread = GetThreadHandle(); + MemWrite( &item->hdr.type, QueueType::ZoneEnd ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneEnd.time, Profiler::GetTime( item->zoneEnd.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) ); + MemWrite( &item->zoneBegin.cpu, cpu ); +#endif + MemWrite( &item->zoneEnd.thread, GetThreadHandle() ); tail.store( magic + 1, std::memory_order_release ); return 0; } @@ -221,9 +240,9 @@ static inline int LuaZoneText( lua_State* L ) ptr[size] = '\0'; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneText; - item->zoneText.thread = GetThreadHandle(); - item->zoneText.text = (uint64_t)ptr; + MemWrite( &item->hdr.type, QueueType::ZoneText ); + MemWrite( &item->zoneText.thread, GetThreadHandle() ); + MemWrite( &item->zoneText.text, (uint64_t)ptr ); tail.store( magic + 1, std::memory_order_release ); return 0; } @@ -240,10 +259,10 @@ static inline int LuaMessage( lua_State* L ) ptr[size] = '\0'; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::Message; - item->message.time = Profiler::GetTime(); - item->message.thread = GetThreadHandle(); - item->message.text = (uint64_t)ptr; + MemWrite( &item->hdr.type, QueueType::Message ); + MemWrite( &item->message.time, Profiler::GetTime() ); + MemWrite( &item->message.thread, GetThreadHandle() ); + MemWrite( &item->message.text, (uint64_t)ptr ); tail.store( magic + 1, std::memory_order_release ); return 0; } diff --git a/TracyOpenGL.hpp b/TracyOpenGL.hpp index 155cca0b..e9f0169f 100644 --- a/TracyOpenGL.hpp +++ b/TracyOpenGL.hpp @@ -16,6 +16,7 @@ #include "Tracy.hpp" #include "client/TracyProfiler.hpp" +#include "common/TracyAlign.hpp" #include "common/TracyAlloc.hpp" #define TracyGpuContext tracy::s_gpuCtx.ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::s_gpuCtx.ptr) tracy::GpuCtx; @@ -53,12 +54,12 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::GpuNewContext; - item->gpuNewContext.cpuTime = tcpu; - item->gpuNewContext.gpuTime = tgpu; - item->gpuNewContext.thread = GetThreadHandle(); - item->gpuNewContext.context = m_context; - item->gpuNewContext.accuracyBits = bits; + MemWrite( &item->hdr.type, QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + MemWrite( &item->gpuNewContext.thread, GetThreadHandle() ); + MemWrite( &item->gpuNewContext.context, m_context ); + MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits ); tail.store( magic + 1, std::memory_order_release ); } @@ -97,9 +98,9 @@ public: glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time ); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::GpuTime; - item->gpuTime.gpuTime = (int64_t)time; - item->gpuTime.context = m_context; + MemWrite( &item->hdr.type, QueueType::GpuTime ); + MemWrite( &item->gpuTime.gpuTime, (int64_t)time ); + MemWrite( &item->gpuTime.context, m_context ); tail.store( magic + 1, std::memory_order_release ); m_tail = ( m_tail + 1 ) % QueryCount; } @@ -110,10 +111,10 @@ public: int64_t tcpu = Profiler::GetTime(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::GpuResync; - item->gpuResync.cpuTime = tcpu; - item->gpuResync.gpuTime = tgpu; - item->gpuResync.context = m_context; + MemWrite( &item->hdr.type, QueueType::GpuResync ); + MemWrite( &item->gpuResync.cpuTime, tcpu ); + MemWrite( &item->gpuResync.gpuTime, tgpu ); + MemWrite( &item->gpuResync.context, m_context ); tail.store( magic + 1, std::memory_order_release ); } } @@ -152,10 +153,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::GpuZoneBegin; - item->gpuZoneBegin.cpuTime = Profiler::GetTime(); - item->gpuZoneBegin.srcloc = (uint64_t)srcloc; - item->gpuZoneBegin.context = s_gpuCtx.ptr->GetId(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBegin ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.context, s_gpuCtx.ptr->GetId() ); tail.store( magic + 1, std::memory_order_release ); } @@ -167,9 +168,9 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::GpuZoneEnd; - item->gpuZoneEnd.cpuTime = Profiler::GetTime(); - item->gpuZoneEnd.context = s_gpuCtx.ptr->GetId(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEnd ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.context, s_gpuCtx.ptr->GetId() ); tail.store( magic + 1, std::memory_order_release ); } }; diff --git a/client/TracyLock.hpp b/client/TracyLock.hpp index d84b041f..4ce423ef 100644 --- a/client/TracyLock.hpp +++ b/client/TracyLock.hpp @@ -5,6 +5,7 @@ #include #include "../common/TracySystem.hpp" +#include "../common/TracyAlign.hpp" #include "TracyProfiler.hpp" namespace tracy @@ -25,10 +26,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockAnnounce; - item->lockAnnounce.id = m_id; - item->lockAnnounce.lckloc = (uint64_t)srcloc; - item->lockAnnounce.type = LockType::Lockable; + MemWrite( &item->hdr.type, QueueType::LockAnnounce ); + MemWrite( &item->lockAnnounce.id, m_id ); + MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); + MemWrite( &item->lockAnnounce.type, LockType::Lockable ); tail.store( magic + 1, std::memory_order_release ); } @@ -43,11 +44,11 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockWait; - item->lockWait.id = m_id; - item->lockWait.thread = thread; - item->lockWait.time = Profiler::GetTime(); - item->lockWait.type = LockType::Lockable; + MemWrite( &item->hdr.type, QueueType::LockWait ); + MemWrite( &item->lockWait.id, m_id ); + MemWrite( &item->lockWait.thread, thread ); + MemWrite( &item->lockWait.time, Profiler::GetTime() ); + MemWrite( &item->lockWait.type, LockType::Lockable ); tail.store( magic + 1, std::memory_order_release ); } @@ -58,10 +59,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockObtain; - item->lockObtain.id = m_id; - item->lockObtain.thread = thread; - item->lockObtain.time = Profiler::GetTime(); + MemWrite( &item->hdr.type, QueueType::LockObtain ); + MemWrite( &item->lockObtain.id, m_id ); + MemWrite( &item->lockObtain.thread, thread ); + MemWrite( &item->lockObtain.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } } @@ -74,10 +75,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockRelease; - item->lockRelease.id = m_id; - item->lockRelease.thread = GetThreadHandle(); - item->lockRelease.time = Profiler::GetTime(); + MemWrite( &item->hdr.type, QueueType::LockRelease ); + MemWrite( &item->lockRelease.id, m_id ); + MemWrite( &item->lockRelease.thread, GetThreadHandle() ); + MemWrite( &item->lockRelease.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } @@ -90,10 +91,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockObtain; - item->lockObtain.id = (uint64_t)&m_lockable; - item->lockObtain.thread = GetThreadHandle(); - item->lockObtain.time = Profiler::GetTime(); + MemWrite( &item->hdr.type, QueueType::LockObtain ); + MemWrite( &item->lockObtain.id, (uint64_t)&m_lockable ); + MemWrite( &item->lockObtain.thread, GetThreadHandle() ); + MemWrite( &item->lockObtain.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } return ret; @@ -105,10 +106,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockMark; - item->lockMark.id = m_id; - item->lockMark.thread = GetThreadHandle(); - item->lockMark.srcloc = (uint64_t)srcloc; + MemWrite( &item->hdr.type, QueueType::LockMark ); + MemWrite( &item->lockMark.id, m_id ); + MemWrite( &item->lockMark.thread, GetThreadHandle() ); + MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); tail.store( magic + 1, std::memory_order_release ); } @@ -131,10 +132,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockAnnounce; - item->lockAnnounce.id = m_id; - item->lockAnnounce.lckloc = (uint64_t)srcloc; - item->lockAnnounce.type = LockType::SharedLockable; + MemWrite( &item->hdr.type, QueueType::LockAnnounce ); + MemWrite( &item->lockAnnounce.id, m_id ); + MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); + MemWrite( &item->lockAnnounce.type, LockType::SharedLockable ); tail.store( magic + 1, std::memory_order_release ); } @@ -149,11 +150,11 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockWait; - item->lockWait.id = m_id; - item->lockWait.thread = thread; - item->lockWait.time = Profiler::GetTime(); - item->lockWait.type = LockType::SharedLockable; + MemWrite( &item->hdr.type, QueueType::LockWait ); + MemWrite( &item->lockWait.id, m_id ); + MemWrite( &item->lockWait.thread, thread ); + MemWrite( &item->lockWait.time, Profiler::GetTime() ); + MemWrite( &item->lockWait.type, LockType::SharedLockable ); tail.store( magic + 1, std::memory_order_release ); } @@ -164,10 +165,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockObtain; - item->lockObtain.id = m_id; - item->lockObtain.thread = thread; - item->lockObtain.time = Profiler::GetTime(); + MemWrite( &item->hdr.type, QueueType::LockObtain ); + MemWrite( &item->lockObtain.id, m_id ); + MemWrite( &item->lockObtain.thread, thread ); + MemWrite( &item->lockObtain.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } } @@ -180,10 +181,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockRelease; - item->lockRelease.id = m_id; - item->lockRelease.thread = GetThreadHandle(); - item->lockRelease.time = Profiler::GetTime(); + MemWrite( &item->hdr.type = QueueType::LockRelease ); + MemWrite( &item->lockRelease.id, m_id ); + MemWrite( &item->lockRelease.thread, GetThreadHandle() ); + MemWrite( &item->lockRelease.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } @@ -196,10 +197,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockObtain; - item->lockObtain.id = (uint64_t)&m_lockable; - item->lockObtain.thread = GetThreadHandle(); - item->lockObtain.time = Profiler::GetTime(); + MemWrite( &item->hdr.type, QueueType::LockObtain ); + MemWrite( &item->lockObtain.id, (uint64_t)&m_lockable ); + MemWrite( &item->lockObtain.thread, GetThreadHandle() ); + MemWrite( &item->lockObtain.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } return ret; @@ -213,11 +214,11 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockSharedWait; - item->lockWait.id = m_id; - item->lockWait.thread = thread; - item->lockWait.time = Profiler::GetTime(); - item->lockWait.type = LockType::SharedLockable; + MemWrite( &item->hdr.type, QueueType::LockSharedWait ); + MemWrite( &item->lockWait.id, m_id ); + MemWrite( &item->lockWait.thread, thread ); + MemWrite( &item->lockWait.time, Profiler::GetTime() ); + MemWrite( &item->lockWait.type, LockType::SharedLockable ); tail.store( magic + 1, std::memory_order_release ); } @@ -228,10 +229,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockSharedObtain; - item->lockObtain.id = m_id; - item->lockObtain.thread = thread; - item->lockObtain.time = Profiler::GetTime(); + MemWrite( &item->hdr.type, QueueType::LockSharedObtain ); + MemWrite( &item->lockObtain.id, m_id ); + MemWrite( &item->lockObtain.thread, thread ); + MemWrite( &item->lockObtain.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } } @@ -244,10 +245,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockSharedRelease; - item->lockRelease.id = m_id; - item->lockRelease.thread = GetThreadHandle(); - item->lockRelease.time = Profiler::GetTime(); + MemWrite( &item->hdr.type, QueueType::LockSharedRelease ); + MemWrite( &item->lockRelease.id, m_id ); + MemWrite( &item->lockRelease.thread, GetThreadHandle() ); + MemWrite( &item->lockRelease.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } @@ -260,10 +261,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockSharedObtain; - item->lockObtain.id = (uint64_t)&m_lockable; - item->lockObtain.thread = GetThreadHandle(); - item->lockObtain.time = Profiler::GetTime(); + MemWrite( &item->hdr.type, QueueType::LockSharedObtain ); + MemWrite( &item->lockObtain.id, (uint64_t)&m_lockable ); + MemWrite( &item->lockObtain.thread, GetThreadHandle() ); + MemWrite( &item->lockObtain.time, Profiler::GetTime() ); tail.store( magic + 1, std::memory_order_release ); } return ret; @@ -275,10 +276,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::LockMark; - item->lockMark.id = m_id; - item->lockMark.thread = GetThreadHandle(); - item->lockMark.srcloc = (uint64_t)srcloc; + MemWrite( &item->hdr.type, QueueType::LockMark ); + MemWrite( &item->lockMark.id, m_id ); + MemWrite( &item->lockMark.thread, GetThreadHandle() ); + MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); tail.store( magic + 1, std::memory_order_release ); } diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 9a08cb49..b9b8a561 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -19,6 +19,7 @@ #include #include +#include "../common/TracyAlign.hpp" #include "../common/TracyProtocol.hpp" #include "../common/TracySocket.hpp" #include "../common/TracySystem.hpp" @@ -180,12 +181,12 @@ void Profiler::Worker() while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); WelcomeMessage welcome; - welcome.timerMul = m_timerMul; - welcome.initBegin = s_initTime.val; - welcome.initEnd = m_timeBegin.load( std::memory_order_relaxed ); - welcome.delay = m_delay; - welcome.resolution = m_resolution; - welcome.epoch = m_epoch; + MemWrite( &welcome.timerMul, m_timerMul ); + MemWrite( &welcome.initBegin, s_initTime.val ); + MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) ); + MemWrite( &welcome.delay, m_delay ); + MemWrite( &welcome.resolution, m_resolution ); + MemWrite( &welcome.epoch, m_epoch ); memcpy( welcome.programName, procname, pnsz ); memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); @@ -250,7 +251,7 @@ void Profiler::Worker() } QueueItem terminate; - terminate.hdr.type = QueueType::Terminate; + MemWrite( &terminate.hdr.type, QueueType::Terminate ); if( !SendData( (const char*)&terminate, 1 ) ) return; for(;;) { @@ -288,22 +289,23 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) while( item != end ) { uint64_t ptr; - if( item->hdr.idx < (int)QueueType::Terminate ) + const auto idx = MemRead( &item->hdr.idx ); + if( idx < (int)QueueType::Terminate ) { - switch( item->hdr.type ) + switch( (QueueType)idx ) { case QueueType::ZoneText: - ptr = item->zoneText.text; + ptr = MemRead( &item->zoneText.text ); SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); tracy_free( (void*)ptr ); break; case QueueType::Message: - ptr = item->message.text; + ptr = MemRead( &item->message.text ); SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); tracy_free( (void*)ptr ); break; case QueueType::ZoneBeginAllocSrcLoc: - ptr = item->zoneBegin.srcloc; + ptr = MemRead( &item->zoneBegin.srcloc ); SendSourceLocationPayload( ptr ); tracy_free( (void*)ptr ); break; @@ -312,7 +314,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) break; } } - if( !AppendData( item, QueueDataSize[item->hdr.idx] ) ) return ConnectionLost; + if( !AppendData( item, QueueDataSize[idx] ) ) return ConnectionLost; item++; } } @@ -362,16 +364,16 @@ bool Profiler::SendString( uint64_t str, const char* ptr, QueueType type ) assert( type == QueueType::StringData || type == QueueType::ThreadName || type == QueueType::CustomStringData || type == QueueType::PlotName ); QueueItem item; - item.hdr.type = type; - item.stringTransfer.ptr = str; + MemWrite( &item.hdr.type, type ); + MemWrite( &item.stringTransfer.ptr, str ); auto len = strlen( ptr ); assert( len <= std::numeric_limits::max() ); auto l16 = uint16_t( len ); - NeedDataSize( QueueDataSize[item.hdr.idx] + sizeof( l16 ) + l16 ); + NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 ); - AppendData( &item, QueueDataSize[item.hdr.idx] ); + AppendData( &item, QueueDataSize[(int)type] ); AppendData( &l16, sizeof( l16 ) ); AppendData( ptr, l16 ); @@ -382,15 +384,15 @@ void Profiler::SendSourceLocation( uint64_t ptr ) { auto srcloc = (const SourceLocation*)ptr; QueueItem item; - item.hdr.type = QueueType::SourceLocation; - item.srcloc.name = (uint64_t)srcloc->name; - item.srcloc.file = (uint64_t)srcloc->file; - item.srcloc.function = (uint64_t)srcloc->function; - item.srcloc.line = srcloc->line; - item.srcloc.r = ( srcloc->color ) & 0xFF; - item.srcloc.g = ( srcloc->color >> 8 ) & 0xFF; - item.srcloc.b = ( srcloc->color >> 16 ) & 0xFF; - AppendData( &item, QueueDataSize[item.hdr.idx] ); + MemWrite( &item.hdr.type, QueueType::SourceLocation ); + MemWrite( &item.srcloc.name, (uint64_t)srcloc->name ); + MemWrite( &item.srcloc.file, (uint64_t)srcloc->file ); + MemWrite( &item.srcloc.function, (uint64_t)srcloc->function ); + MemWrite( &item.srcloc.line, srcloc->line ); + MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color ) & 0xFF ) ); + MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) ); + MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) ); + AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] ); } bool Profiler::SendSourceLocationPayload( uint64_t _ptr ) @@ -398,17 +400,17 @@ bool Profiler::SendSourceLocationPayload( uint64_t _ptr ) auto ptr = (const char*)_ptr; QueueItem item; - item.hdr.type = QueueType::SourceLocationPayload; - item.stringTransfer.ptr = _ptr; + MemWrite( &item.hdr.type, QueueType::SourceLocationPayload ); + MemWrite( &item.stringTransfer.ptr, _ptr ); const auto len = *((uint32_t*)ptr); assert( len <= std::numeric_limits::max() ); assert( len > 4 ); const auto l16 = uint16_t( len - 4 ); - NeedDataSize( QueueDataSize[item.hdr.idx] + sizeof( l16 ) + l16 ); + NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( l16 ) + l16 ); - AppendData( &item, QueueDataSize[item.hdr.idx] ); + AppendData( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] ); AppendData( &l16, sizeof( l16 ) ); AppendData( ptr + 4, l16 ); @@ -508,19 +510,31 @@ void Profiler::CalibrateDelay() Magic magic; auto& tail = ptoken->get_tail_index(); auto item = ptoken->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneBegin; - item->zoneBegin.thread = GetThreadHandle(); - item->zoneBegin.time = GetTime( item->zoneBegin.cpu ); - item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location; + MemWrite( &item->hdr.type, QueueType::ZoneBegin ); + MemWrite( &item->zoneBegin.thread, GetThreadHandle() ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) ); + MemWrite( &item->zoneBegin.cpu, cpu ); +#endif + MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location ); tail.store( magic + 1, std::memory_order_release ); } { Magic magic; auto& tail = ptoken->get_tail_index(); auto item = ptoken->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneEnd; - item->zoneEnd.thread = 0; - item->zoneEnd.time = GetTime( item->zoneEnd.cpu ); + MemWrite( &item->hdr.type, QueueType::ZoneEnd ); + MemWrite( &item->zoneEnd.thread, uint64_t( 0 ) ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneEnd.time, GetTime( item->zoneEnd.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneEnd.time, GetTime( cpu ) ); + MemWrite( &item->zoneEnd.cpu, cpu ); +#endif tail.store( magic + 1, std::memory_order_release ); } } @@ -538,19 +552,31 @@ void Profiler::CalibrateDelay() Magic magic; auto& tail = ptoken->get_tail_index(); auto item = ptoken->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneBegin; - item->zoneBegin.thread = GetThreadHandle(); - item->zoneBegin.time = GetTime( item->zoneBegin.cpu ); - item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location; + MemWrite( &item->hdr.type, QueueType::ZoneBegin ); + MemWrite( &item->zoneBegin.thread, GetThreadHandle() ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) ); + MemWrite( &item->zoneBegin.cpu, cpu ); +#endif + MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location ); tail.store( magic + 1, std::memory_order_release ); } { Magic magic; auto& tail = ptoken->get_tail_index(); auto item = ptoken->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneEnd; - item->zoneEnd.thread = 0; - item->zoneEnd.time = GetTime( item->zoneEnd.cpu ); + MemWrite( &item->hdr.type, QueueType::ZoneEnd ); + MemWrite( &item->zoneEnd.thread, uint64_t( 0 ) ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneEnd.time, GetTime( item->zoneEnd.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneEnd.time, GetTime( cpu ) ); + MemWrite( &item->zoneEnd.cpu, cpu ); +#endif tail.store( magic + 1, std::memory_order_release ); } } diff --git a/client/TracyProfiler.hpp b/client/TracyProfiler.hpp index 238f6f0e..1666d0c9 100644 --- a/client/TracyProfiler.hpp +++ b/client/TracyProfiler.hpp @@ -9,6 +9,7 @@ #include "concurrentqueue.h" #include "../common/tracy_lz4.hpp" #include "../common/TracyQueue.hpp" +#include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" #include "../common/TracySystem.hpp" @@ -109,8 +110,8 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::FrameMarkMsg; - item->frameMark.time = GetTime(); + MemWrite( &item->hdr.type, QueueType::FrameMarkMsg ); + MemWrite( &item->frameMark.time, GetTime() ); tail.store( magic + 1, std::memory_order_release ); } @@ -120,11 +121,11 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::PlotData; - item->plotData.name = (uint64_t)name; - item->plotData.time = GetTime(); - item->plotData.type = PlotDataType::Int; - item->plotData.data.i = val; + MemWrite( &item->hdr.type, QueueType::PlotData ); + MemWrite( &item->plotData.name, (uint64_t)name ); + MemWrite( &item->plotData.time, GetTime() ); + MemWrite( &item->plotData.type, PlotDataType::Int ); + MemWrite( &item->plotData.data.i, val ); tail.store( magic + 1, std::memory_order_release ); } @@ -134,11 +135,11 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::PlotData; - item->plotData.name = (uint64_t)name; - item->plotData.time = GetTime(); - item->plotData.type = PlotDataType::Float; - item->plotData.data.f = val; + MemWrite( &item->hdr.type, QueueType::PlotData ); + MemWrite( &item->plotData.name, (uint64_t)name ); + MemWrite( &item->plotData.time, GetTime() ); + MemWrite( &item->plotData.type, PlotDataType::Float ); + MemWrite( &item->plotData.data.f, val ); tail.store( magic + 1, std::memory_order_release ); } @@ -148,11 +149,11 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::PlotData; - item->plotData.name = (uint64_t)name; - item->plotData.time = GetTime(); - item->plotData.type = PlotDataType::Double; - item->plotData.data.d = val; + MemWrite( &item->hdr.type, QueueType::PlotData ); + MemWrite( &item->plotData.name, (uint64_t)name ); + MemWrite( &item->plotData.time, GetTime() ); + MemWrite( &item->plotData.type, PlotDataType::Double ); + MemWrite( &item->plotData.data.d, val ); tail.store( magic + 1, std::memory_order_release ); } @@ -165,10 +166,10 @@ public: ptr[size] = '\0'; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::Message; - item->message.time = GetTime(); - item->message.thread = GetThreadHandle(); - item->message.text = (uint64_t)ptr; + MemWrite( &item->hdr.type, QueueType::Message ); + MemWrite( &item->message.time, GetTime() ); + MemWrite( &item->message.thread, GetThreadHandle() ); + MemWrite( &item->message.text, (uint64_t)ptr ); tail.store( magic + 1, std::memory_order_release ); } @@ -178,10 +179,10 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::MessageLiteral; - item->message.time = GetTime(); - item->message.thread = GetThreadHandle(); - item->message.text = (uint64_t)txt; + MemWrite( &item->hdr.type, QueueType::MessageLiteral ); + MemWrite( &item->message.time, GetTime() ); + MemWrite( &item->message.thread, GetThreadHandle() ); + MemWrite( &item->message.text, (uint64_t)txt ); tail.store( magic + 1, std::memory_order_release ); } diff --git a/client/TracyScoped.hpp b/client/TracyScoped.hpp index d929869b..189a17dc 100644 --- a/client/TracyScoped.hpp +++ b/client/TracyScoped.hpp @@ -5,6 +5,7 @@ #include #include "../common/TracySystem.hpp" +#include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" #include "TracyProfiler.hpp" @@ -22,10 +23,16 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneBegin; - item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu ); - item->zoneBegin.thread = thread; - item->zoneBegin.srcloc = (uint64_t)srcloc; + MemWrite( &item->hdr.type, QueueType::ZoneBegin ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) ); + MemWrite( &item->zoneBegin.cpu, cpu ); +#endif + MemWrite( &item->zoneBegin.thread, thread ); + MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); tail.store( magic + 1, std::memory_order_release ); } @@ -35,9 +42,15 @@ public: auto& token = s_token.ptr; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneEnd; - item->zoneEnd.time = Profiler::GetTime( item->zoneEnd.cpu ); - item->zoneEnd.thread = m_thread; + MemWrite( &item->hdr.type, QueueType::ZoneEnd ); +#ifdef TRACY_RDTSCP_SUPPORTED + MemWrite( &item->zoneEnd.time, Profiler::GetTime( item->zoneEnd.cpu ) ); +#else + uint32_t cpu; + MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) ); + MemWrite( &item->zoneBegin.cpu, cpu ); +#endif + MemWrite( &item->zoneEnd.thread, m_thread ); tail.store( magic + 1, std::memory_order_release ); } @@ -50,9 +63,9 @@ public: ptr[size] = '\0'; auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); - item->hdr.type = QueueType::ZoneText; - item->zoneText.thread = m_thread; - item->zoneText.text = (uint64_t)ptr; + MemWrite( &item->hdr.type, QueueType::ZoneText ); + MemWrite( &item->zoneText.thread, m_thread ); + MemWrite( &item->zoneText.text, (uint64_t)ptr ); tail.store( magic + 1, std::memory_order_release ); } diff --git a/common/TracyProtocol.hpp b/common/TracyProtocol.hpp index 597596d9..26cd611d 100644 --- a/common/TracyProtocol.hpp +++ b/common/TracyProtocol.hpp @@ -31,8 +31,8 @@ enum { WelcomeMessageProgramNameSize = 64 }; struct WelcomeMessage { double timerMul; - uint64_t initBegin; - uint64_t initEnd; + int64_t initBegin; + int64_t initEnd; uint64_t delay; uint64_t resolution; uint64_t epoch;