mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-22 22:44:34 +00:00
Avoid excessive stack operations for cpu query.
This commit is contained in:
parent
75457c1465
commit
cc8b357f09
@ -32,7 +32,7 @@ public:
|
||||
|
||||
tracy_force_inline void lock()
|
||||
{
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
const auto thread = GetThreadHandle();
|
||||
{
|
||||
Magic magic;
|
||||
@ -63,7 +63,7 @@ public:
|
||||
{
|
||||
m_lockable.unlock();
|
||||
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
Magic magic;
|
||||
auto& token = s_token;
|
||||
auto item = s_queue.enqueue_begin( token, magic );
|
||||
@ -79,7 +79,7 @@ public:
|
||||
const auto ret = m_lockable.try_lock();
|
||||
if( ret )
|
||||
{
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
Magic magic;
|
||||
auto& token = s_token;
|
||||
auto item = s_queue.enqueue_begin( token, magic );
|
||||
|
@ -78,7 +78,7 @@ Profiler::Profiler()
|
||||
|
||||
CalibrateTimer();
|
||||
CalibrateDelay();
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
m_timeBegin = GetTime( cpu );
|
||||
|
||||
m_thread = std::thread( [this] { Worker(); } );
|
||||
@ -274,7 +274,7 @@ bool Profiler::HandleServerQuery()
|
||||
void Profiler::CalibrateTimer()
|
||||
{
|
||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
std::atomic_signal_fence( std::memory_order_acq_rel );
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
const auto r0 = tracy_rdtscp( cpu );
|
||||
@ -310,7 +310,7 @@ void Profiler::CalibrateDelay()
|
||||
enum { Events = Iterations * 2 }; // start + end
|
||||
static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
|
||||
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
moodycamel::ProducerToken ptoken( s_queue );
|
||||
for( int i=0; i<Iterations; i++ )
|
||||
{
|
||||
|
@ -43,36 +43,32 @@ public:
|
||||
~Profiler();
|
||||
|
||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||
static tracy_force_inline int64_t tracy_rdtscp( int8_t& cpu )
|
||||
static tracy_force_inline int64_t tracy_rdtscp( uint32_t& cpu )
|
||||
{
|
||||
#if defined _MSC_VER || defined __CYGWIN__
|
||||
unsigned int ui;
|
||||
const auto t = int64_t( __rdtscp( &ui ) );
|
||||
cpu = (int8_t)ui;
|
||||
const auto t = int64_t( __rdtscp( &cpu ) );
|
||||
return t;
|
||||
#elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
|
||||
uint64_t eax, edx;
|
||||
unsigned int ui;
|
||||
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (ui) :: );
|
||||
cpu = (int8_t)ui;
|
||||
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (cpu) :: );
|
||||
return ( edx << 32 ) + eax;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static tracy_force_inline int64_t GetTime( int8_t& cpu )
|
||||
static tracy_force_inline int64_t GetTime( uint32_t& cpu )
|
||||
{
|
||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||
return tracy_rdtscp( cpu );
|
||||
#else
|
||||
cpu = -1;
|
||||
cpu = 0xFFFFFFFF;
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void FrameMark()
|
||||
{
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
Magic magic;
|
||||
auto& token = s_token;
|
||||
auto item = s_queue.enqueue_begin( token, magic );
|
||||
|
@ -32,14 +32,14 @@ struct QueueZoneBegin
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
uint64_t srcloc; // ptr
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
};
|
||||
|
||||
struct QueueZoneEnd
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
int8_t cpu;
|
||||
uint32_t cpu;
|
||||
};
|
||||
|
||||
struct QueueStringTransfer
|
||||
|
@ -446,7 +446,8 @@ void View::ProcessZoneBegin( const QueueZoneBegin& ev )
|
||||
zone->start = ev.time * m_timerMul;
|
||||
zone->end = -1;
|
||||
zone->srcloc = ev.srcloc;
|
||||
zone->cpu_start = ev.cpu;
|
||||
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
||||
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
||||
zone->text = nullptr;
|
||||
|
||||
std::unique_lock<std::mutex> lock( m_lock );
|
||||
@ -464,7 +465,8 @@ void View::ProcessZoneEnd( const QueueZoneEnd& ev )
|
||||
assert( zone->end == -1 );
|
||||
std::unique_lock<std::mutex> lock( m_lock );
|
||||
zone->end = ev.time * m_timerMul;
|
||||
zone->cpu_end = ev.cpu;
|
||||
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
||||
zone->cpu_end = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
||||
lock.unlock();
|
||||
assert( zone->end >= zone->start );
|
||||
UpdateZone( zone );
|
||||
|
Loading…
Reference in New Issue
Block a user