Avoid excessive stack operations for cpu query.

This commit is contained in:
Bartosz Taudul 2017-10-10 23:21:30 +02:00
parent 75457c1465
commit cc8b357f09
5 changed files with 18 additions and 20 deletions

View File

@ -32,7 +32,7 @@ public:
tracy_force_inline void lock() tracy_force_inline void lock()
{ {
int8_t cpu; uint32_t cpu;
const auto thread = GetThreadHandle(); const auto thread = GetThreadHandle();
{ {
Magic magic; Magic magic;
@ -63,7 +63,7 @@ public:
{ {
m_lockable.unlock(); m_lockable.unlock();
int8_t cpu; uint32_t cpu;
Magic magic; Magic magic;
auto& token = s_token; auto& token = s_token;
auto item = s_queue.enqueue_begin( token, magic ); auto item = s_queue.enqueue_begin( token, magic );
@ -79,7 +79,7 @@ public:
const auto ret = m_lockable.try_lock(); const auto ret = m_lockable.try_lock();
if( ret ) if( ret )
{ {
int8_t cpu; uint32_t cpu;
Magic magic; Magic magic;
auto& token = s_token; auto& token = s_token;
auto item = s_queue.enqueue_begin( token, magic ); auto item = s_queue.enqueue_begin( token, magic );

View File

@ -78,7 +78,7 @@ Profiler::Profiler()
CalibrateTimer(); CalibrateTimer();
CalibrateDelay(); CalibrateDelay();
int8_t cpu; uint32_t cpu;
m_timeBegin = GetTime( cpu ); m_timeBegin = GetTime( cpu );
m_thread = std::thread( [this] { Worker(); } ); m_thread = std::thread( [this] { Worker(); } );
@ -274,7 +274,7 @@ bool Profiler::HandleServerQuery()
void Profiler::CalibrateTimer() void Profiler::CalibrateTimer()
{ {
#ifdef TRACY_RDTSCP_SUPPORTED #ifdef TRACY_RDTSCP_SUPPORTED
int8_t cpu; uint32_t cpu;
std::atomic_signal_fence( std::memory_order_acq_rel ); std::atomic_signal_fence( std::memory_order_acq_rel );
const auto t0 = std::chrono::high_resolution_clock::now(); const auto t0 = std::chrono::high_resolution_clock::now();
const auto r0 = tracy_rdtscp( cpu ); const auto r0 = tracy_rdtscp( cpu );
@ -310,7 +310,7 @@ void Profiler::CalibrateDelay()
enum { Events = Iterations * 2 }; // start + end enum { Events = Iterations * 2 }; // start + end
static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" ); static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
int8_t cpu; uint32_t cpu;
moodycamel::ProducerToken ptoken( s_queue ); moodycamel::ProducerToken ptoken( s_queue );
for( int i=0; i<Iterations; i++ ) for( int i=0; i<Iterations; i++ )
{ {

View File

@ -43,36 +43,32 @@ public:
~Profiler(); ~Profiler();
#ifdef TRACY_RDTSCP_SUPPORTED #ifdef TRACY_RDTSCP_SUPPORTED
static tracy_force_inline int64_t tracy_rdtscp( int8_t& cpu ) static tracy_force_inline int64_t tracy_rdtscp( uint32_t& cpu )
{ {
#if defined _MSC_VER || defined __CYGWIN__ #if defined _MSC_VER || defined __CYGWIN__
unsigned int ui; const auto t = int64_t( __rdtscp( &cpu ) );
const auto t = int64_t( __rdtscp( &ui ) );
cpu = (int8_t)ui;
return t; return t;
#elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 #elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint64_t eax, edx; uint64_t eax, edx;
unsigned int ui; asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (cpu) :: );
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (ui) :: );
cpu = (int8_t)ui;
return ( edx << 32 ) + eax; return ( edx << 32 ) + eax;
#endif #endif
} }
#endif #endif
static tracy_force_inline int64_t GetTime( int8_t& cpu ) static tracy_force_inline int64_t GetTime( uint32_t& cpu )
{ {
#ifdef TRACY_RDTSCP_SUPPORTED #ifdef TRACY_RDTSCP_SUPPORTED
return tracy_rdtscp( cpu ); return tracy_rdtscp( cpu );
#else #else
cpu = -1; cpu = 0xFFFFFFFF;
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count(); return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
#endif #endif
} }
static tracy_force_inline void FrameMark() static tracy_force_inline void FrameMark()
{ {
int8_t cpu; uint32_t cpu;
Magic magic; Magic magic;
auto& token = s_token; auto& token = s_token;
auto item = s_queue.enqueue_begin( token, magic ); auto item = s_queue.enqueue_begin( token, magic );

View File

@ -32,14 +32,14 @@ struct QueueZoneBegin
int64_t time; int64_t time;
uint64_t thread; uint64_t thread;
uint64_t srcloc; // ptr uint64_t srcloc; // ptr
int8_t cpu; uint32_t cpu;
}; };
struct QueueZoneEnd struct QueueZoneEnd
{ {
int64_t time; int64_t time;
uint64_t thread; uint64_t thread;
int8_t cpu; uint32_t cpu;
}; };
struct QueueStringTransfer struct QueueStringTransfer

View File

@ -446,7 +446,8 @@ void View::ProcessZoneBegin( const QueueZoneBegin& ev )
zone->start = ev.time * m_timerMul; zone->start = ev.time * m_timerMul;
zone->end = -1; zone->end = -1;
zone->srcloc = ev.srcloc; zone->srcloc = ev.srcloc;
zone->cpu_start = ev.cpu; assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
zone->text = nullptr; zone->text = nullptr;
std::unique_lock<std::mutex> lock( m_lock ); std::unique_lock<std::mutex> lock( m_lock );
@ -464,7 +465,8 @@ void View::ProcessZoneEnd( const QueueZoneEnd& ev )
assert( zone->end == -1 ); assert( zone->end == -1 );
std::unique_lock<std::mutex> lock( m_lock ); std::unique_lock<std::mutex> lock( m_lock );
zone->end = ev.time * m_timerMul; zone->end = ev.time * m_timerMul;
zone->cpu_end = ev.cpu; assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_end = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
lock.unlock(); lock.unlock();
assert( zone->end >= zone->start ); assert( zone->end >= zone->start );
UpdateZone( zone ); UpdateZone( zone );