Keep a list of buffers left to handle.

Previously a bitmap of buffers was repeatedly scanned to see which buffers
still contain data. This process was needlessly wasting cycles (seen as a
hotspot when profiled) and worse yet, the workload increased with the number
of CPU cores (=> buffers used) to handle.

The new implementation instead maintains a list of buffer indices that have to
be handled. This list does not contain empty buffers, so each loop iteration
performs some work, instead of just spinning in search for buffers to handle.
This commit is contained in:
Bartosz Taudul 2022-08-18 13:59:56 +02:00
parent 940f32c1a8
commit 197007ab47
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3

View File

@ -1285,7 +1285,7 @@ void SysTraceWorker( void* ptr )
const auto ctxBufNum = numBuffers - ctxBufferIdx; const auto ctxBufNum = numBuffers - ctxBufferIdx;
int activeNum = 0; int activeNum = 0;
bool active[512]; uint16_t active[512];
uint32_t end[512]; uint32_t end[512];
uint32_t pos[512]; uint32_t pos[512];
for( int i=0; i<ctxBufNum; i++ ) for( int i=0; i<ctxBufNum; i++ )
@ -1295,9 +1295,9 @@ void SysTraceWorker( void* ptr )
const auto rbTail = ringArray[rbIdx].GetTail(); const auto rbTail = ringArray[rbIdx].GetTail();
const auto rbActive = rbHead != rbTail; const auto rbActive = rbHead != rbTail;
active[i] = rbActive;
if( rbActive ) if( rbActive )
{ {
active[activeNum] = (uint16_t)i;
activeNum++; activeNum++;
end[i] = rbHead - rbTail; end[i] = rbHead - rbTail;
pos[i] = 0; pos[i] = 0;
@ -1313,13 +1313,14 @@ void SysTraceWorker( void* ptr )
while( activeNum > 0 ) while( activeNum > 0 )
{ {
int sel = -1; int sel = -1;
int selPos;
int64_t t0 = std::numeric_limits<int64_t>::max(); int64_t t0 = std::numeric_limits<int64_t>::max();
for( int i=0; i<ctxBufNum; i++ ) for( int i=0; i<activeNum; i++ )
{ {
if( !active[i] ) continue; auto idx = active[i];
auto rbPos = pos[i]; auto rbPos = pos[idx];
assert( rbPos < end[i] ); assert( rbPos < end[idx] );
const auto rbIdx = ctxBufferIdx + i; const auto rbIdx = ctxBufferIdx + idx;
perf_event_header hdr; perf_event_header hdr;
ringArray[rbIdx].Read( &hdr, rbPos, sizeof( perf_event_header ) ); ringArray[rbIdx].Read( &hdr, rbPos, sizeof( perf_event_header ) );
if( hdr.type == PERF_RECORD_SAMPLE ) if( hdr.type == PERF_RECORD_SAMPLE )
@ -1329,20 +1330,22 @@ void SysTraceWorker( void* ptr )
if( rbTime < t0 ) if( rbTime < t0 )
{ {
t0 = rbTime; t0 = rbTime;
sel = i; sel = idx;
selPos = i;
} }
} }
else else
{ {
rbPos += hdr.size; rbPos += hdr.size;
if( rbPos == end[i] ) if( rbPos == end[idx] )
{ {
active[i] = false; memmove( active+i, active+i+1, sizeof(*active) * ( activeNum - i - 1 ) );
activeNum--; activeNum--;
i--;
} }
else else
{ {
pos[i] = rbPos; pos[idx] = rbPos;
} }
} }
} }
@ -1488,7 +1491,7 @@ void SysTraceWorker( void* ptr )
rbPos += hdr.size; rbPos += hdr.size;
if( rbPos == end[sel] ) if( rbPos == end[sel] )
{ {
active[sel] = false; memmove( active+selPos, active+selPos+1, sizeof(*active) * ( activeNum - selPos - 1 ) );
activeNum--; activeNum--;
} }
else else