This reverts commit aec6a04b8e99b42eca431fc0b56947937d3a14c2. (google/benchmark still at hash 1576991177ba97a4b2ff6c45950f1fa6e9aa678c as it was in #83488. Also reapplied same extra local diffs) Verified locally.
283 lines
9.4 KiB
C++
283 lines
9.4 KiB
C++
// Copyright 2021 Google Inc. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "perf_counters.h"
|
|
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#if defined HAVE_LIBPFM
|
|
#include "perfmon/pfmlib.h"
|
|
#include "perfmon/pfmlib_perf_event.h"
|
|
#endif
|
|
|
|
namespace benchmark {
|
|
namespace internal {
|
|
|
|
constexpr size_t PerfCounterValues::kMaxCounters;
|
|
|
|
#if defined HAVE_LIBPFM
|
|
|
|
size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
|
|
// Create a pointer for multiple reads
|
|
const size_t bufsize = values_.size() * sizeof(values_[0]);
|
|
char* ptr = reinterpret_cast<char*>(values_.data());
|
|
size_t size = bufsize;
|
|
for (int lead : leaders) {
|
|
auto read_bytes = ::read(lead, ptr, size);
|
|
if (read_bytes >= ssize_t(sizeof(uint64_t))) {
|
|
// Actual data bytes are all bytes minus initial padding
|
|
std::size_t data_bytes = read_bytes - sizeof(uint64_t);
|
|
// This should be very cheap since it's in hot cache
|
|
std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
|
|
// Increment our counters
|
|
ptr += data_bytes;
|
|
size -= data_bytes;
|
|
} else {
|
|
int err = errno;
|
|
GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
|
|
<< " " << ::strerror(err) << "\n";
|
|
return 0;
|
|
}
|
|
}
|
|
return (bufsize - size) / sizeof(uint64_t);
|
|
}
|
|
|
|
const bool PerfCounters::kSupported = true;
|
|
|
|
// Initializes libpfm only on the first call. Returns whether that single
|
|
// initialization was successful.
|
|
bool PerfCounters::Initialize() {
|
|
// Function-scope static gets initialized only once on first call.
|
|
static const bool success = []() {
|
|
return pfm_initialize() == PFM_SUCCESS;
|
|
}();
|
|
return success;
|
|
}
|
|
|
|
bool PerfCounters::IsCounterSupported(const std::string& name) {
|
|
Initialize();
|
|
perf_event_attr_t attr;
|
|
std::memset(&attr, 0, sizeof(attr));
|
|
pfm_perf_encode_arg_t arg;
|
|
std::memset(&arg, 0, sizeof(arg));
|
|
arg.attr = &attr;
|
|
const int mode = PFM_PLM3; // user mode only
|
|
int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
|
|
&arg);
|
|
return (ret == PFM_SUCCESS);
|
|
}
|
|
|
|
PerfCounters PerfCounters::Create(
|
|
const std::vector<std::string>& counter_names) {
|
|
if (!counter_names.empty()) {
|
|
Initialize();
|
|
}
|
|
|
|
// Valid counters will populate these arrays but we start empty
|
|
std::vector<std::string> valid_names;
|
|
std::vector<int> counter_ids;
|
|
std::vector<int> leader_ids;
|
|
|
|
// Resize to the maximum possible
|
|
valid_names.reserve(counter_names.size());
|
|
counter_ids.reserve(counter_names.size());
|
|
|
|
const int kCounterMode = PFM_PLM3; // user mode only
|
|
|
|
// Group leads will be assigned on demand. The idea is that once we cannot
|
|
// create a counter descriptor, the reason is that this group has maxed out
|
|
// so we set the group_id again to -1 and retry - giving the algorithm a
|
|
// chance to create a new group leader to hold the next set of counters.
|
|
int group_id = -1;
|
|
|
|
// Loop through all performance counters
|
|
for (size_t i = 0; i < counter_names.size(); ++i) {
|
|
// we are about to push into the valid names vector
|
|
// check if we did not reach the maximum
|
|
if (valid_names.size() == PerfCounterValues::kMaxCounters) {
|
|
// Log a message if we maxed out and stop adding
|
|
GetErrorLogInstance()
|
|
<< counter_names.size() << " counters were requested. The maximum is "
|
|
<< PerfCounterValues::kMaxCounters << " and " << valid_names.size()
|
|
<< " were already added. All remaining counters will be ignored\n";
|
|
// stop the loop and return what we have already
|
|
break;
|
|
}
|
|
|
|
// Check if this name is empty
|
|
const auto& name = counter_names[i];
|
|
if (name.empty()) {
|
|
GetErrorLogInstance()
|
|
<< "A performance counter name was the empty string\n";
|
|
continue;
|
|
}
|
|
|
|
// Here first means first in group, ie the group leader
|
|
const bool is_first = (group_id < 0);
|
|
|
|
// This struct will be populated by libpfm from the counter string
|
|
// and then fed into the syscall perf_event_open
|
|
struct perf_event_attr attr {};
|
|
attr.size = sizeof(attr);
|
|
|
|
// This is the input struct to libpfm.
|
|
pfm_perf_encode_arg_t arg{};
|
|
arg.attr = &attr;
|
|
const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
|
|
PFM_OS_PERF_EVENT, &arg);
|
|
if (pfm_get != PFM_SUCCESS) {
|
|
GetErrorLogInstance()
|
|
<< "Unknown performance counter name: " << name << "\n";
|
|
continue;
|
|
}
|
|
|
|
// We then proceed to populate the remaining fields in our attribute struct
|
|
// Note: the man page for perf_event_create suggests inherit = true and
|
|
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
|
|
// case.
|
|
attr.disabled = is_first;
|
|
attr.inherit = true;
|
|
attr.pinned = is_first;
|
|
attr.exclude_kernel = true;
|
|
attr.exclude_user = false;
|
|
attr.exclude_hv = true;
|
|
|
|
// Read all counters in a group in one read.
|
|
attr.read_format = PERF_FORMAT_GROUP;
|
|
|
|
int id = -1;
|
|
while (id < 0) {
|
|
static constexpr size_t kNrOfSyscallRetries = 5;
|
|
// Retry syscall as it was interrupted often (b/64774091).
|
|
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
|
|
++num_retries) {
|
|
id = perf_event_open(&attr, 0, -1, group_id, 0);
|
|
if (id >= 0 || errno != EINTR) {
|
|
break;
|
|
}
|
|
}
|
|
if (id < 0) {
|
|
// If the file descriptor is negative we might have reached a limit
|
|
// in the current group. Set the group_id to -1 and retry
|
|
if (group_id >= 0) {
|
|
// Create a new group
|
|
group_id = -1;
|
|
} else {
|
|
// At this point we have already retried to set a new group id and
|
|
// failed. We then give up.
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// We failed to get a new file descriptor. We might have reached a hard
|
|
// hardware limit that cannot be resolved even with group multiplexing
|
|
if (id < 0) {
|
|
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
|
|
"for performance counter "
|
|
<< name << ". Ignoring\n";
|
|
|
|
// We give up on this counter but try to keep going
|
|
// as the others would be fine
|
|
continue;
|
|
}
|
|
if (group_id < 0) {
|
|
// This is a leader, store and assign it to the current file descriptor
|
|
leader_ids.push_back(id);
|
|
group_id = id;
|
|
}
|
|
// This is a valid counter, add it to our descriptor's list
|
|
counter_ids.push_back(id);
|
|
valid_names.push_back(name);
|
|
}
|
|
|
|
// Loop through all group leaders activating them
|
|
// There is another option of starting ALL counters in a process but
|
|
// that would be far reaching an intrusion. If the user is using PMCs
|
|
// by themselves then this would have a side effect on them. It is
|
|
// friendlier to loop through all groups individually.
|
|
for (int lead : leader_ids) {
|
|
if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
|
|
// This should never happen but if it does, we give up on the
|
|
// entire batch as recovery would be a mess.
|
|
GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
|
|
"Claring out all counters.\n";
|
|
|
|
// Close all peformance counters
|
|
for (int id : counter_ids) {
|
|
::close(id);
|
|
}
|
|
|
|
// Return an empty object so our internal state is still good and
|
|
// the process can continue normally without impact
|
|
return NoCounters();
|
|
}
|
|
}
|
|
|
|
return PerfCounters(std::move(valid_names), std::move(counter_ids),
|
|
std::move(leader_ids));
|
|
}
|
|
|
|
void PerfCounters::CloseCounters() const {
|
|
if (counter_ids_.empty()) {
|
|
return;
|
|
}
|
|
for (int lead : leader_ids_) {
|
|
ioctl(lead, PERF_EVENT_IOC_DISABLE);
|
|
}
|
|
for (int fd : counter_ids_) {
|
|
close(fd);
|
|
}
|
|
}
|
|
#else // defined HAVE_LIBPFM
|
|
size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
|
|
|
|
const bool PerfCounters::kSupported = false;
|
|
|
|
bool PerfCounters::Initialize() { return false; }
|
|
|
|
bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
|
|
|
|
PerfCounters PerfCounters::Create(
|
|
const std::vector<std::string>& counter_names) {
|
|
if (!counter_names.empty()) {
|
|
GetErrorLogInstance() << "Performance counters not supported.\n";
|
|
}
|
|
return NoCounters();
|
|
}
|
|
|
|
void PerfCounters::CloseCounters() const {}
|
|
#endif // defined HAVE_LIBPFM
|
|
|
|
PerfCountersMeasurement::PerfCountersMeasurement(
|
|
const std::vector<std::string>& counter_names)
|
|
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
|
|
counters_ = PerfCounters::Create(counter_names);
|
|
}
|
|
|
|
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
|
|
if (this != &other) {
|
|
CloseCounters();
|
|
|
|
counter_ids_ = std::move(other.counter_ids_);
|
|
leader_ids_ = std::move(other.leader_ids_);
|
|
counter_names_ = std::move(other.counter_names_);
|
|
}
|
|
return *this;
|
|
}
|
|
} // namespace internal
|
|
} // namespace benchmark
|