Restrict perf to return events only for the current PID.

This commit is contained in:
Bartosz Taudul 2021-05-23 18:53:09 +02:00
parent 4ad6f682c8
commit bbd1c4505c
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3

View File

@ -689,13 +689,13 @@ enum TraceEventId
EventBranchMiss EventBranchMiss
}; };
static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1 ) static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid )
{ {
pe.config = config1; pe.config = config1;
pe.precise_ip = 3; pe.precise_ip = 3;
while( pe.precise_ip != 0 ) while( pe.precise_ip != 0 )
{ {
const int fd = perf_event_open( &pe, -1, 0, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 ) if( fd != -1 )
{ {
close( fd ); close( fd );
@ -706,7 +706,7 @@ static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, uns
pe.config = config0; pe.config = config0;
while( pe.precise_ip != 0 ) while( pe.precise_ip != 0 )
{ {
const int fd = perf_event_open( &pe, -1, 0, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 ) if( fd != -1 )
{ {
close( fd ); close( fd );
@ -745,6 +745,7 @@ static void SetupSampling( int64_t& samplingPeriod )
#endif #endif
samplingPeriod = GetSamplingPeriod(); samplingPeriod = GetSamplingPeriod();
uint32_t currentPid = (uint32_t)getpid();
s_numCpus = (int)std::thread::hardware_concurrency(); s_numCpus = (int)std::thread::hardware_concurrency();
s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus * 7 ); s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus * 7 );
@ -763,6 +764,7 @@ static void SetupSampling( int64_t& samplingPeriod )
pe.exclude_callchain_kernel = 1; pe.exclude_callchain_kernel = 1;
pe.disabled = 1; pe.disabled = 1;
pe.freq = 1; pe.freq = 1;
pe.inherit = 1;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1; pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW; pe.clockid = CLOCK_MONOTONIC_RAW;
@ -770,7 +772,7 @@ static void SetupSampling( int64_t& samplingPeriod )
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
{ {
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd == -1 ) if( fd == -1 )
{ {
for( int j=0; j<s_numBuffers; j++ ) s_ring[j].~RingBuffer<RingBufSize>(); for( int j=0; j<s_numBuffers; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
@ -792,13 +794,14 @@ static void SetupSampling( int64_t& samplingPeriod )
pe.exclude_guest = 1; pe.exclude_guest = 1;
pe.exclude_hv = 1; pe.exclude_hv = 1;
pe.freq = 1; pe.freq = 1;
pe.inherit = 1;
if( !noRetirement ) if( !noRetirement )
{ {
TracyDebug( "Setup sampling cycles + retirement\n" ); TracyDebug( "Setup sampling cycles + retirement\n" );
ProbePreciseIp( pe, PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS ); ProbePreciseIp( pe, PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, currentPid );
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
{ {
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 ) if( fd != -1 )
{ {
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCpuCycles ); new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCpuCycles );
@ -810,7 +813,7 @@ static void SetupSampling( int64_t& samplingPeriod )
pe.config = PERF_COUNT_HW_INSTRUCTIONS; pe.config = PERF_COUNT_HW_INSTRUCTIONS;
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
{ {
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 ) if( fd != -1 )
{ {
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventInstructionsRetired ); new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventInstructionsRetired );
@ -824,10 +827,10 @@ static void SetupSampling( int64_t& samplingPeriod )
if( !noCache ) if( !noCache )
{ {
TracyDebug( "Setup sampling CPU cache references + misses\n" ); TracyDebug( "Setup sampling CPU cache references + misses\n" );
ProbePreciseIp( pe, PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES ); ProbePreciseIp( pe, PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES, currentPid );
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
{ {
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 ) if( fd != -1 )
{ {
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheReference ); new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheReference );
@ -839,7 +842,7 @@ static void SetupSampling( int64_t& samplingPeriod )
pe.config = PERF_COUNT_HW_CACHE_MISSES; pe.config = PERF_COUNT_HW_CACHE_MISSES;
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
{ {
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 ) if( fd != -1 )
{ {
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheMiss ); new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheMiss );
@ -853,10 +856,10 @@ static void SetupSampling( int64_t& samplingPeriod )
if( !noBranch ) if( !noBranch )
{ {
TracyDebug( "Setup sampling CPU branch retirements + misses\n" ); TracyDebug( "Setup sampling CPU branch retirements + misses\n" );
ProbePreciseIp( pe, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_MISSES ); ProbePreciseIp( pe, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_MISSES, currentPid );
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
{ {
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 ) if( fd != -1 )
{ {
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchRetired ); new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchRetired );
@ -868,7 +871,7 @@ static void SetupSampling( int64_t& samplingPeriod )
pe.config = PERF_COUNT_HW_BRANCH_MISSES; pe.config = PERF_COUNT_HW_BRANCH_MISSES;
for( int i=0; i<s_numCpus; i++ ) for( int i=0; i<s_numCpus; i++ )
{ {
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 ) if( fd != -1 )
{ {
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchMiss ); new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchMiss );