Store thread handle in a thread local variable.

This saves us a non-inlineable function call. Thread local block is
accessed anyway, since we need to get the token, so we already have the
pointer and don't need to get it a second time (which is done inside
Windows' GetCurrentThreadId()). We also don't need to store the thread
id in ScopedZone anymore, as it was a micro-optimization to save us the
second GetThreadHandle() call.

This change has a measurable effect of reducing enqueue time from ~10 to
~8 ns.

A further optimization would be to completely skip thread handle
retrieval during zone capture and do it instead on retrieval of data
from the queue. Since each thread has its own producer ("token"), the
thread handle should be accessible during the dequeue operation. This is
a much more invasive change, that would require a) modification of the
queue, b) additional processing of dequeued data to inject the thread
handle.
This commit is contained in:
Bartosz Taudul 2019-06-24 19:19:47 +02:00
parent 46b75c5a19
commit a56c47a6a0
2 changed files with 14 additions and 8 deletions

View File

@ -24,9 +24,8 @@ public:
#endif
{
if( !m_active ) return;
const auto thread = GetThreadHandle();
m_thread = thread;
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
@ -53,7 +52,6 @@ public:
{
if( !m_active ) return;
const auto thread = GetThreadHandle();
m_thread = thread;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
@ -80,6 +78,7 @@ public:
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
@ -91,7 +90,7 @@ public:
MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneEnd.cpu, cpu );
#endif
MemWrite( &item->zoneEnd.thread, m_thread );
MemWrite( &item->zoneEnd.thread, thread );
tail.store( magic + 1, std::memory_order_release );
}
@ -102,6 +101,7 @@ public:
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
@ -109,7 +109,7 @@ public:
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneText );
MemWrite( &item->zoneText.thread, m_thread );
MemWrite( &item->zoneText.thread, thread );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
}
@ -121,6 +121,7 @@ public:
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
@ -128,13 +129,12 @@ public:
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneName );
MemWrite( &item->zoneText.thread, m_thread );
MemWrite( &item->zoneText.thread, thread );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
}
private:
uint64_t m_thread;
const bool m_active;
#ifdef TRACY_ON_DEMAND

View File

@ -24,7 +24,7 @@ namespace tracy
{
#ifdef TRACY_ENABLE
static inline uint64_t GetThreadHandle()
static inline uint64_t GetThreadHandleImpl()
{
#ifdef _WIN32
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
@ -39,6 +39,12 @@ static inline uint64_t GetThreadHandle()
#endif
}
const thread_local auto s_threadHandle = GetThreadHandleImpl();
static inline uint64_t GetThreadHandle()
{
return s_threadHandle;
}
#endif
void SetThreadName( std::thread& thread, const char* name );