Sample cache and branch stats.

This commit is contained in:
Bartosz Taudul 2021-05-20 02:15:06 +02:00
parent 9ef5430c01
commit 5f3d1c0faf
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
2 changed files with 100 additions and 4 deletions

View File

@ -681,7 +681,11 @@ enum TraceEventId
{ {
EventCallstack, EventCallstack,
EventCpuCycles, EventCpuCycles,
EventInstructionsRetired EventInstructionsRetired,
EventCacheReference,
EventCacheMiss,
EventBranchRetired,
EventBranchMiss
}; };
static void SetupSampling( int64_t& samplingPeriod ) static void SetupSampling( int64_t& samplingPeriod )
@ -697,10 +701,24 @@ static void SetupSampling( int64_t& samplingPeriod )
const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1'; const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1';
#endif #endif
#ifdef TRACY_NO_SAMPLE_CACHE
const bool noCache = true;
#else
const char* noCacheEnv = GetEnvVar( "TRACY_NO_SAMPLE_CACHE" );
const bool noCache = noCacheEnv && noCacheEnv[0] == '1';
#endif
#ifdef TRACY_NO_SAMPLE_BRANCH
const bool noBranch = true;
#else
const char* noBranchEnv = GetEnvVar( "TRACY_NO_SAMPLE_BRANCH" );
const bool noBranch = noBranchEnv && noBranchEnv[0] == '1';
#endif
samplingPeriod = GetSamplingPeriod(); samplingPeriod = GetSamplingPeriod();
s_numCpus = (int)std::thread::hardware_concurrency(); s_numCpus = (int)std::thread::hardware_concurrency();
s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus * 3 ); s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus * 7 );
s_numBuffers = 0; s_numBuffers = 0;
// Stack traces // Stack traces
@ -745,7 +763,6 @@ static void SetupSampling( int64_t& samplingPeriod )
pe.exclude_kernel = 1; pe.exclude_kernel = 1;
pe.exclude_idle = 1; pe.exclude_idle = 1;
pe.precise_ip = 2; pe.precise_ip = 2;
if( !noRetirement ) if( !noRetirement )
{ {
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
@ -761,7 +778,6 @@ static void SetupSampling( int64_t& samplingPeriod )
// Instructions retired // Instructions retired
pe.config = PERF_COUNT_HW_INSTRUCTIONS; pe.config = PERF_COUNT_HW_INSTRUCTIONS;
if( !noRetirement ) if( !noRetirement )
{ {
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
@ -775,6 +791,66 @@ static void SetupSampling( int64_t& samplingPeriod )
} }
} }
// cache reference
pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
if( !noCache )
{
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheReference );
s_numBuffers++;
}
}
}
// cache miss
pe.config = PERF_COUNT_HW_CACHE_MISSES;
if( !noCache )
{
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheMiss );
s_numBuffers++;
}
}
}
// branch retired
pe.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
if( !noBranch )
{
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchRetired );
s_numBuffers++;
}
}
}
// branch miss
pe.config = PERF_COUNT_HW_BRANCH_MISSES;
if( !noBranch )
{
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchMiss );
s_numBuffers++;
}
}
}
s_threadSampling = (Thread*)tracy_malloc( sizeof( Thread ) ); s_threadSampling = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_threadSampling) Thread( [] (void*) { new(s_threadSampling) Thread( [] (void*) {
@ -916,6 +992,18 @@ static void SetupSampling( int64_t& samplingPeriod )
case EventInstructionsRetired: case EventInstructionsRetired:
type = QueueType::HwSampleInstructionRetired; type = QueueType::HwSampleInstructionRetired;
break; break;
case EventCacheReference:
type = QueueType::HwSampleCpuCycle;
break;
case EventCacheMiss:
type = QueueType::HwSampleInstructionRetired;
break;
case EventBranchRetired:
type = QueueType::HwSampleCpuCycle;
break;
case EventBranchMiss:
type = QueueType::HwSampleInstructionRetired;
break;
default: default:
assert( false ); assert( false );
break; break;

View File

@ -84,6 +84,10 @@ enum class QueueType : uint8_t
TidToPid, TidToPid,
HwSampleCpuCycle, HwSampleCpuCycle,
HwSampleInstructionRetired, HwSampleInstructionRetired,
HwSampleCacheReference,
HwSampleCacheMiss,
HwSampleBranchRetired,
HwSampleBranchMiss,
PlotConfig, PlotConfig,
ParamSetup, ParamSetup,
AckServerQueryNoop, AckServerQueryNoop,
@ -663,6 +667,10 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueTidToPid ), sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache reference
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache miss
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch retired
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch miss
sizeof( QueueHeader ) + sizeof( QueuePlotConfig ), sizeof( QueueHeader ) + sizeof( QueuePlotConfig ),
sizeof( QueueHeader ) + sizeof( QueueParamSetup ), sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
sizeof( QueueHeader ), // server query acknowledgement sizeof( QueueHeader ), // server query acknowledgement