mirror of
https://github.com/wolfpld/tracy.git
synced 2024-12-01 17:34:36 +00:00
Sample cache and branch stats.
This commit is contained in:
parent
9ef5430c01
commit
5f3d1c0faf
@ -681,7 +681,11 @@ enum TraceEventId
|
|||||||
{
|
{
|
||||||
EventCallstack,
|
EventCallstack,
|
||||||
EventCpuCycles,
|
EventCpuCycles,
|
||||||
EventInstructionsRetired
|
EventInstructionsRetired,
|
||||||
|
EventCacheReference,
|
||||||
|
EventCacheMiss,
|
||||||
|
EventBranchRetired,
|
||||||
|
EventBranchMiss
|
||||||
};
|
};
|
||||||
|
|
||||||
static void SetupSampling( int64_t& samplingPeriod )
|
static void SetupSampling( int64_t& samplingPeriod )
|
||||||
@ -697,10 +701,24 @@ static void SetupSampling( int64_t& samplingPeriod )
|
|||||||
const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1';
|
const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1';
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef TRACY_NO_SAMPLE_CACHE
|
||||||
|
const bool noCache = true;
|
||||||
|
#else
|
||||||
|
const char* noCacheEnv = GetEnvVar( "TRACY_NO_SAMPLE_CACHE" );
|
||||||
|
const bool noCache = noCacheEnv && noCacheEnv[0] == '1';
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef TRACY_NO_SAMPLE_BRANCH
|
||||||
|
const bool noBranch = true;
|
||||||
|
#else
|
||||||
|
const char* noBranchEnv = GetEnvVar( "TRACY_NO_SAMPLE_BRANCH" );
|
||||||
|
const bool noBranch = noBranchEnv && noBranchEnv[0] == '1';
|
||||||
|
#endif
|
||||||
|
|
||||||
samplingPeriod = GetSamplingPeriod();
|
samplingPeriod = GetSamplingPeriod();
|
||||||
|
|
||||||
s_numCpus = (int)std::thread::hardware_concurrency();
|
s_numCpus = (int)std::thread::hardware_concurrency();
|
||||||
s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus * 3 );
|
s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus * 7 );
|
||||||
s_numBuffers = 0;
|
s_numBuffers = 0;
|
||||||
|
|
||||||
// Stack traces
|
// Stack traces
|
||||||
@ -745,7 +763,6 @@ static void SetupSampling( int64_t& samplingPeriod )
|
|||||||
pe.exclude_kernel = 1;
|
pe.exclude_kernel = 1;
|
||||||
pe.exclude_idle = 1;
|
pe.exclude_idle = 1;
|
||||||
pe.precise_ip = 2;
|
pe.precise_ip = 2;
|
||||||
|
|
||||||
if( !noRetirement )
|
if( !noRetirement )
|
||||||
{
|
{
|
||||||
for( int i=0; i<s_numCpus; i++ )
|
for( int i=0; i<s_numCpus; i++ )
|
||||||
@ -761,7 +778,6 @@ static void SetupSampling( int64_t& samplingPeriod )
|
|||||||
|
|
||||||
// Instructions retired
|
// Instructions retired
|
||||||
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
|
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
|
||||||
|
|
||||||
if( !noRetirement )
|
if( !noRetirement )
|
||||||
{
|
{
|
||||||
for( int i=0; i<s_numCpus; i++ )
|
for( int i=0; i<s_numCpus; i++ )
|
||||||
@ -775,6 +791,66 @@ static void SetupSampling( int64_t& samplingPeriod )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cache reference
|
||||||
|
pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
|
||||||
|
if( !noCache )
|
||||||
|
{
|
||||||
|
for( int i=0; i<s_numCpus; i++ )
|
||||||
|
{
|
||||||
|
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||||
|
if( fd != -1 )
|
||||||
|
{
|
||||||
|
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheReference );
|
||||||
|
s_numBuffers++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// cache miss
|
||||||
|
pe.config = PERF_COUNT_HW_CACHE_MISSES;
|
||||||
|
if( !noCache )
|
||||||
|
{
|
||||||
|
for( int i=0; i<s_numCpus; i++ )
|
||||||
|
{
|
||||||
|
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||||
|
if( fd != -1 )
|
||||||
|
{
|
||||||
|
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheMiss );
|
||||||
|
s_numBuffers++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// branch retired
|
||||||
|
pe.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
|
||||||
|
if( !noBranch )
|
||||||
|
{
|
||||||
|
for( int i=0; i<s_numCpus; i++ )
|
||||||
|
{
|
||||||
|
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||||
|
if( fd != -1 )
|
||||||
|
{
|
||||||
|
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchRetired );
|
||||||
|
s_numBuffers++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// branch miss
|
||||||
|
pe.config = PERF_COUNT_HW_BRANCH_MISSES;
|
||||||
|
if( !noBranch )
|
||||||
|
{
|
||||||
|
for( int i=0; i<s_numCpus; i++ )
|
||||||
|
{
|
||||||
|
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||||
|
if( fd != -1 )
|
||||||
|
{
|
||||||
|
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchMiss );
|
||||||
|
s_numBuffers++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
s_threadSampling = (Thread*)tracy_malloc( sizeof( Thread ) );
|
s_threadSampling = (Thread*)tracy_malloc( sizeof( Thread ) );
|
||||||
new(s_threadSampling) Thread( [] (void*) {
|
new(s_threadSampling) Thread( [] (void*) {
|
||||||
@ -916,6 +992,18 @@ static void SetupSampling( int64_t& samplingPeriod )
|
|||||||
case EventInstructionsRetired:
|
case EventInstructionsRetired:
|
||||||
type = QueueType::HwSampleInstructionRetired;
|
type = QueueType::HwSampleInstructionRetired;
|
||||||
break;
|
break;
|
||||||
|
case EventCacheReference:
|
||||||
|
type = QueueType::HwSampleCpuCycle;
|
||||||
|
break;
|
||||||
|
case EventCacheMiss:
|
||||||
|
type = QueueType::HwSampleInstructionRetired;
|
||||||
|
break;
|
||||||
|
case EventBranchRetired:
|
||||||
|
type = QueueType::HwSampleCpuCycle;
|
||||||
|
break;
|
||||||
|
case EventBranchMiss:
|
||||||
|
type = QueueType::HwSampleInstructionRetired;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
assert( false );
|
assert( false );
|
||||||
break;
|
break;
|
||||||
|
@ -84,6 +84,10 @@ enum class QueueType : uint8_t
|
|||||||
TidToPid,
|
TidToPid,
|
||||||
HwSampleCpuCycle,
|
HwSampleCpuCycle,
|
||||||
HwSampleInstructionRetired,
|
HwSampleInstructionRetired,
|
||||||
|
HwSampleCacheReference,
|
||||||
|
HwSampleCacheMiss,
|
||||||
|
HwSampleBranchRetired,
|
||||||
|
HwSampleBranchMiss,
|
||||||
PlotConfig,
|
PlotConfig,
|
||||||
ParamSetup,
|
ParamSetup,
|
||||||
AckServerQueryNoop,
|
AckServerQueryNoop,
|
||||||
@ -663,6 +667,10 @@ static constexpr size_t QueueDataSize[] = {
|
|||||||
sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
|
sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
|
||||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle
|
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle
|
||||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired
|
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired
|
||||||
|
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache reference
|
||||||
|
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache miss
|
||||||
|
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch retired
|
||||||
|
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch miss
|
||||||
sizeof( QueueHeader ) + sizeof( QueuePlotConfig ),
|
sizeof( QueueHeader ) + sizeof( QueuePlotConfig ),
|
||||||
sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
|
sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
|
||||||
sizeof( QueueHeader ), // server query acknowledgement
|
sizeof( QueueHeader ), // server query acknowledgement
|
||||||
|
Loading…
Reference in New Issue
Block a user