mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-26 16:04:34 +00:00
Avoid excessive stack operations for cpu query.
This commit is contained in:
parent
75457c1465
commit
cc8b357f09
@ -32,7 +32,7 @@ public:
|
|||||||
|
|
||||||
tracy_force_inline void lock()
|
tracy_force_inline void lock()
|
||||||
{
|
{
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
const auto thread = GetThreadHandle();
|
const auto thread = GetThreadHandle();
|
||||||
{
|
{
|
||||||
Magic magic;
|
Magic magic;
|
||||||
@ -63,7 +63,7 @@ public:
|
|||||||
{
|
{
|
||||||
m_lockable.unlock();
|
m_lockable.unlock();
|
||||||
|
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
Magic magic;
|
Magic magic;
|
||||||
auto& token = s_token;
|
auto& token = s_token;
|
||||||
auto item = s_queue.enqueue_begin( token, magic );
|
auto item = s_queue.enqueue_begin( token, magic );
|
||||||
@ -79,7 +79,7 @@ public:
|
|||||||
const auto ret = m_lockable.try_lock();
|
const auto ret = m_lockable.try_lock();
|
||||||
if( ret )
|
if( ret )
|
||||||
{
|
{
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
Magic magic;
|
Magic magic;
|
||||||
auto& token = s_token;
|
auto& token = s_token;
|
||||||
auto item = s_queue.enqueue_begin( token, magic );
|
auto item = s_queue.enqueue_begin( token, magic );
|
||||||
|
@ -78,7 +78,7 @@ Profiler::Profiler()
|
|||||||
|
|
||||||
CalibrateTimer();
|
CalibrateTimer();
|
||||||
CalibrateDelay();
|
CalibrateDelay();
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
m_timeBegin = GetTime( cpu );
|
m_timeBegin = GetTime( cpu );
|
||||||
|
|
||||||
m_thread = std::thread( [this] { Worker(); } );
|
m_thread = std::thread( [this] { Worker(); } );
|
||||||
@ -274,7 +274,7 @@ bool Profiler::HandleServerQuery()
|
|||||||
void Profiler::CalibrateTimer()
|
void Profiler::CalibrateTimer()
|
||||||
{
|
{
|
||||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
std::atomic_signal_fence( std::memory_order_acq_rel );
|
std::atomic_signal_fence( std::memory_order_acq_rel );
|
||||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||||
const auto r0 = tracy_rdtscp( cpu );
|
const auto r0 = tracy_rdtscp( cpu );
|
||||||
@ -310,7 +310,7 @@ void Profiler::CalibrateDelay()
|
|||||||
enum { Events = Iterations * 2 }; // start + end
|
enum { Events = Iterations * 2 }; // start + end
|
||||||
static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
|
static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
|
||||||
|
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
moodycamel::ProducerToken ptoken( s_queue );
|
moodycamel::ProducerToken ptoken( s_queue );
|
||||||
for( int i=0; i<Iterations; i++ )
|
for( int i=0; i<Iterations; i++ )
|
||||||
{
|
{
|
||||||
|
@ -43,36 +43,32 @@ public:
|
|||||||
~Profiler();
|
~Profiler();
|
||||||
|
|
||||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||||
static tracy_force_inline int64_t tracy_rdtscp( int8_t& cpu )
|
static tracy_force_inline int64_t tracy_rdtscp( uint32_t& cpu )
|
||||||
{
|
{
|
||||||
#if defined _MSC_VER || defined __CYGWIN__
|
#if defined _MSC_VER || defined __CYGWIN__
|
||||||
unsigned int ui;
|
const auto t = int64_t( __rdtscp( &cpu ) );
|
||||||
const auto t = int64_t( __rdtscp( &ui ) );
|
|
||||||
cpu = (int8_t)ui;
|
|
||||||
return t;
|
return t;
|
||||||
#elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
|
#elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
|
||||||
uint64_t eax, edx;
|
uint64_t eax, edx;
|
||||||
unsigned int ui;
|
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (cpu) :: );
|
||||||
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (ui) :: );
|
|
||||||
cpu = (int8_t)ui;
|
|
||||||
return ( edx << 32 ) + eax;
|
return ( edx << 32 ) + eax;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static tracy_force_inline int64_t GetTime( int8_t& cpu )
|
static tracy_force_inline int64_t GetTime( uint32_t& cpu )
|
||||||
{
|
{
|
||||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||||
return tracy_rdtscp( cpu );
|
return tracy_rdtscp( cpu );
|
||||||
#else
|
#else
|
||||||
cpu = -1;
|
cpu = 0xFFFFFFFF;
|
||||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static tracy_force_inline void FrameMark()
|
static tracy_force_inline void FrameMark()
|
||||||
{
|
{
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
Magic magic;
|
Magic magic;
|
||||||
auto& token = s_token;
|
auto& token = s_token;
|
||||||
auto item = s_queue.enqueue_begin( token, magic );
|
auto item = s_queue.enqueue_begin( token, magic );
|
||||||
|
@ -32,14 +32,14 @@ struct QueueZoneBegin
|
|||||||
int64_t time;
|
int64_t time;
|
||||||
uint64_t thread;
|
uint64_t thread;
|
||||||
uint64_t srcloc; // ptr
|
uint64_t srcloc; // ptr
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct QueueZoneEnd
|
struct QueueZoneEnd
|
||||||
{
|
{
|
||||||
int64_t time;
|
int64_t time;
|
||||||
uint64_t thread;
|
uint64_t thread;
|
||||||
int8_t cpu;
|
uint32_t cpu;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct QueueStringTransfer
|
struct QueueStringTransfer
|
||||||
|
@ -446,7 +446,8 @@ void View::ProcessZoneBegin( const QueueZoneBegin& ev )
|
|||||||
zone->start = ev.time * m_timerMul;
|
zone->start = ev.time * m_timerMul;
|
||||||
zone->end = -1;
|
zone->end = -1;
|
||||||
zone->srcloc = ev.srcloc;
|
zone->srcloc = ev.srcloc;
|
||||||
zone->cpu_start = ev.cpu;
|
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
||||||
|
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
||||||
zone->text = nullptr;
|
zone->text = nullptr;
|
||||||
|
|
||||||
std::unique_lock<std::mutex> lock( m_lock );
|
std::unique_lock<std::mutex> lock( m_lock );
|
||||||
@ -464,7 +465,8 @@ void View::ProcessZoneEnd( const QueueZoneEnd& ev )
|
|||||||
assert( zone->end == -1 );
|
assert( zone->end == -1 );
|
||||||
std::unique_lock<std::mutex> lock( m_lock );
|
std::unique_lock<std::mutex> lock( m_lock );
|
||||||
zone->end = ev.time * m_timerMul;
|
zone->end = ev.time * m_timerMul;
|
||||||
zone->cpu_end = ev.cpu;
|
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
||||||
|
zone->cpu_end = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
assert( zone->end >= zone->start );
|
assert( zone->end >= zone->start );
|
||||||
UpdateZone( zone );
|
UpdateZone( zone );
|
||||||
|
Loading…
Reference in New Issue
Block a user