Avoid excessive stack operations for cpu query.

This commit is contained in:
Bartosz Taudul 2017-10-10 23:21:30 +02:00
parent 75457c1465
commit cc8b357f09
5 changed files with 18 additions and 20 deletions

View File

@ -32,7 +32,7 @@ public:
tracy_force_inline void lock()
{
int8_t cpu;
uint32_t cpu;
const auto thread = GetThreadHandle();
{
Magic magic;
@ -63,7 +63,7 @@ public:
{
m_lockable.unlock();
int8_t cpu;
uint32_t cpu;
Magic magic;
auto& token = s_token;
auto item = s_queue.enqueue_begin( token, magic );
@ -79,7 +79,7 @@ public:
const auto ret = m_lockable.try_lock();
if( ret )
{
int8_t cpu;
uint32_t cpu;
Magic magic;
auto& token = s_token;
auto item = s_queue.enqueue_begin( token, magic );

View File

@ -78,7 +78,7 @@ Profiler::Profiler()
CalibrateTimer();
CalibrateDelay();
int8_t cpu;
uint32_t cpu;
m_timeBegin = GetTime( cpu );
m_thread = std::thread( [this] { Worker(); } );
@ -274,7 +274,7 @@ bool Profiler::HandleServerQuery()
void Profiler::CalibrateTimer()
{
#ifdef TRACY_RDTSCP_SUPPORTED
int8_t cpu;
uint32_t cpu;
std::atomic_signal_fence( std::memory_order_acq_rel );
const auto t0 = std::chrono::high_resolution_clock::now();
const auto r0 = tracy_rdtscp( cpu );
@ -310,7 +310,7 @@ void Profiler::CalibrateDelay()
enum { Events = Iterations * 2 }; // start + end
static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
int8_t cpu;
uint32_t cpu;
moodycamel::ProducerToken ptoken( s_queue );
for( int i=0; i<Iterations; i++ )
{

View File

@ -43,36 +43,32 @@ public:
~Profiler();
#ifdef TRACY_RDTSCP_SUPPORTED
static tracy_force_inline int64_t tracy_rdtscp( int8_t& cpu )
static tracy_force_inline int64_t tracy_rdtscp( uint32_t& cpu )
{
#if defined _MSC_VER || defined __CYGWIN__
unsigned int ui;
const auto t = int64_t( __rdtscp( &ui ) );
cpu = (int8_t)ui;
const auto t = int64_t( __rdtscp( &cpu ) );
return t;
#elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint64_t eax, edx;
unsigned int ui;
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (ui) :: );
cpu = (int8_t)ui;
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (cpu) :: );
return ( edx << 32 ) + eax;
#endif
}
#endif
static tracy_force_inline int64_t GetTime( int8_t& cpu )
static tracy_force_inline int64_t GetTime( uint32_t& cpu )
{
#ifdef TRACY_RDTSCP_SUPPORTED
return tracy_rdtscp( cpu );
#else
cpu = -1;
cpu = 0xFFFFFFFF;
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
#endif
}
static tracy_force_inline void FrameMark()
{
int8_t cpu;
uint32_t cpu;
Magic magic;
auto& token = s_token;
auto item = s_queue.enqueue_begin( token, magic );

View File

@ -32,14 +32,14 @@ struct QueueZoneBegin
int64_t time;
uint64_t thread;
uint64_t srcloc; // ptr
int8_t cpu;
uint32_t cpu;
};
struct QueueZoneEnd
{
int64_t time;
uint64_t thread;
int8_t cpu;
uint32_t cpu;
};
struct QueueStringTransfer

View File

@ -446,7 +446,8 @@ void View::ProcessZoneBegin( const QueueZoneBegin& ev )
zone->start = ev.time * m_timerMul;
zone->end = -1;
zone->srcloc = ev.srcloc;
zone->cpu_start = ev.cpu;
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
zone->text = nullptr;
std::unique_lock<std::mutex> lock( m_lock );
@ -464,7 +465,8 @@ void View::ProcessZoneEnd( const QueueZoneEnd& ev )
assert( zone->end == -1 );
std::unique_lock<std::mutex> lock( m_lock );
zone->end = ev.time * m_timerMul;
zone->cpu_end = ev.cpu;
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_end = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
lock.unlock();
assert( zone->end >= zone->start );
UpdateZone( zone );