Add no-cpu GetTime() variant.

In this version the address of cpu output variable is const, so there's
no stack address calculation involved.
This commit is contained in:
Bartosz Taudul 2017-10-29 16:12:16 +01:00
parent 68f5a17bca
commit a220043114
3 changed files with 41 additions and 29 deletions

View File

@ -28,7 +28,6 @@ public:
tracy_force_inline void lock()
{
uint32_t cpu;
const auto thread = GetThreadHandle();
{
Magic magic;
@ -38,7 +37,7 @@ public:
item->hdr.type = QueueType::LockWait;
item->lockWait.id = m_id;
item->lockWait.thread = thread;
item->lockWait.time = Profiler::GetTime( cpu );
item->lockWait.time = Profiler::GetTime();
item->lockWait.lckloc = m_lckloc;
tail.store( magic + 1, std::memory_order_release );
}
@ -53,7 +52,7 @@ public:
item->hdr.type = QueueType::LockObtain;
item->lockObtain.id = m_id;
item->lockObtain.thread = thread;
item->lockObtain.time = Profiler::GetTime( cpu );
item->lockObtain.time = Profiler::GetTime();
tail.store( magic + 1, std::memory_order_release );
}
}
@ -62,7 +61,6 @@ public:
{
m_lockable.unlock();
uint32_t cpu;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
@ -70,7 +68,7 @@ public:
item->hdr.type = QueueType::LockRelease;
item->lockRelease.id = m_id;
item->lockRelease.thread = GetThreadHandle();
item->lockRelease.time = Profiler::GetTime( cpu );
item->lockRelease.time = Profiler::GetTime();
tail.store( magic + 1, std::memory_order_release );
}
@ -79,7 +77,6 @@ public:
const auto ret = m_lockable.try_lock();
if( ret )
{
uint32_t cpu;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
@ -87,7 +84,7 @@ public:
item->hdr.type = QueueType::LockObtain;
item->lockObtain.id = (uint64_t)&m_lockable;
item->lockObtain.thread = GetThreadHandle();
item->lockObtain.time = Profiler::GetTime( cpu );
item->lockObtain.time = Profiler::GetTime();
tail.store( magic + 1, std::memory_order_release );
}
return ret;

View File

@ -73,7 +73,6 @@ enum { QueuePrealloc = 256 * 1024 };
static Profiler* s_instance = nullptr;
static Thread* s_thread = nullptr;
static unsigned int __dontcare_cpu;
// 1a. But s_queue is needed for initialization of variables in point 2.
extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
@ -90,7 +89,7 @@ thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_prod
# pragma init_seg( ".CRT$XCB" )
#endif
static InitTimeWrapper init_order(101) s_initTime { Profiler::GetTime( __dontcare_cpu ) };
static InitTimeWrapper init_order(101) s_initTime { Profiler::GetTime() };
static RPMallocInit init_order(102) s_rpmalloc_init;
moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
@ -123,8 +122,7 @@ Profiler::Profiler()
new(s_thread) Thread( LaunchWorker, this );
SetThreadName( s_thread->Handle(), "Tracy Profiler" );
uint32_t cpu;
m_timeBegin.store( GetTime( cpu ), std::memory_order_relaxed );
m_timeBegin.store( GetTime(), std::memory_order_relaxed );
}
Profiler::~Profiler()
@ -406,7 +404,6 @@ void Profiler::CalibrateDelay()
enum { Events = Iterations * 2 }; // start + end
static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
uint32_t cpu;
moodycamel::ProducerToken ptoken_detail( s_queue );
moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptoken = s_queue.get_explicit_producer( ptoken_detail );
for( int i=0; i<Iterations; i++ )
@ -432,13 +429,13 @@ void Profiler::CalibrateDelay()
tail.store( magic + 1, std::memory_order_release );
}
}
const auto f0 = GetTime( cpu );
const auto f0 = GetTime();
for( int i=0; i<Iterations; i++ )
{
static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
FakeZone ___tracy_scoped_zone( &__tracy_source_location );
}
const auto t0 = GetTime( cpu );
const auto t0 = GetTime();
for( int i=0; i<Iterations; i++ )
{
static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
@ -462,7 +459,7 @@ void Profiler::CalibrateDelay()
tail.store( magic + 1, std::memory_order_release );
}
}
const auto t1 = GetTime( cpu );
const auto t1 = GetTime();
const auto dt = t1 - t0;
const auto df = t0 - f0;
m_delay = ( dt - df ) / Events;
@ -470,8 +467,8 @@ void Profiler::CalibrateDelay()
auto mindiff = std::numeric_limits<int64_t>::max();
for( int i=0; i<Iterations * 10; i++ )
{
const auto t0 = GetTime( cpu );
const auto t1 = GetTime( cpu );
const auto t0 = GetTime();
const auto t1 = GetTime();
const auto dt = t1 - t0;
if( dt > 0 && dt < mindiff ) mindiff = dt;
}

View File

@ -61,6 +61,21 @@ public:
}
#endif
#ifdef TRACY_RDTSCP_SUPPORTED
static tracy_force_inline int64_t tracy_rdtscp()
{
#if defined _MSC_VER || defined __CYGWIN__
static unsigned int dontcare;
const auto t = int64_t( __rdtscp( &dontcare ) );
return t;
#elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint32_t eax, edx;
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx) :: );
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
#endif
}
#endif
static tracy_force_inline int64_t GetTime( uint32_t& cpu )
{
#ifdef TRACY_RDTSCP_SUPPORTED
@ -71,28 +86,35 @@ public:
#endif
}
static tracy_force_inline int64_t GetTime()
{
#ifdef TRACY_RDTSCP_SUPPORTED
return tracy_rdtscp();
#else
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
#endif
}
static tracy_force_inline void FrameMark()
{
uint32_t cpu;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
item->hdr.type = QueueType::FrameMarkMsg;
item->frameMark.time = GetTime( cpu );
item->frameMark.time = GetTime();
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void PlotData( const char* name, int64_t val )
{
uint32_t cpu;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
item->hdr.type = QueueType::PlotData;
item->plotData.name = (uint64_t)name;
item->plotData.time = GetTime( cpu );
item->plotData.time = GetTime();
item->plotData.type = PlotDataType::Int;
item->plotData.data.i = val;
tail.store( magic + 1, std::memory_order_release );
@ -100,14 +122,13 @@ public:
static tracy_force_inline void PlotData( const char* name, float val )
{
uint32_t cpu;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
item->hdr.type = QueueType::PlotData;
item->plotData.name = (uint64_t)name;
item->plotData.time = GetTime( cpu );
item->plotData.time = GetTime();
item->plotData.type = PlotDataType::Float;
item->plotData.data.f = val;
tail.store( magic + 1, std::memory_order_release );
@ -115,14 +136,13 @@ public:
static tracy_force_inline void PlotData( const char* name, double val )
{
uint32_t cpu;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
item->hdr.type = QueueType::PlotData;
item->plotData.name = (uint64_t)name;
item->plotData.time = GetTime( cpu );
item->plotData.time = GetTime();
item->plotData.type = PlotDataType::Double;
item->plotData.data.d = val;
tail.store( magic + 1, std::memory_order_release );
@ -130,7 +150,6 @@ public:
static tracy_force_inline void Message( const char* txt, size_t size )
{
uint32_t cpu;
Magic magic;
auto& token = s_token.ptr;
auto ptr = (char*)tracy_malloc( size+1 );
@ -139,7 +158,7 @@ public:
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
item->hdr.type = QueueType::Message;
item->message.time = GetTime( cpu );
item->message.time = GetTime();
item->message.thread = GetThreadHandle();
item->message.text = (uint64_t)ptr;
tail.store( magic + 1, std::memory_order_release );
@ -147,13 +166,12 @@ public:
static tracy_force_inline void Message( const char* txt )
{
uint32_t cpu;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
item->hdr.type = QueueType::MessageLiteral;
item->message.time = GetTime( cpu );
item->message.time = GetTime();
item->message.thread = GetThreadHandle();
item->message.text = (uint64_t)txt;
tail.store( magic + 1, std::memory_order_release );