llvm-project/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
Aiden Grossman 1c3b15e9f5 [llvm-exegesis] Use LLVM Support to get thread ID
This patch switches from manually using the Linux syscall to get the
current thread ID to using the relevant LLVM Support libraries that
abstract over the low level system details.
2024-03-12 16:55:01 -07:00

702 lines
26 KiB
C++

//===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <cmath>
#include <memory>
#include <string>
#include "Assembler.h"
#include "BenchmarkRunner.h"
#include "Error.h"
#include "MCInstrDescView.h"
#include "MmapUtils.h"
#include "PerfHelper.h"
#include "SubprocessMemory.h"
#include "Target.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SystemZ/zOSSupport.h"
#ifdef __linux__
#ifdef HAVE_LIBPFM
#include <perfmon/perf_event.h>
#endif
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>
#if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
#include <sys/rseq.h>
#if defined(RSEQ_SIG) && defined(SYS_rseq)
#define GLIBC_INITS_RSEQ
#endif
#endif
#endif // __linux__
namespace llvm {
namespace exegesis {
BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
ExecutionModeE ExecutionMode,
ArrayRef<ValidationEvent> ValCounters)
: State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
ExecutionMode(ExecutionMode), ValidationCounters(ValCounters),
Scratch(std::make_unique<ScratchSpace>()) {}
BenchmarkRunner::~BenchmarkRunner() = default;
void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
const SmallVectorImpl<int64_t> &NewValues,
SmallVectorImpl<int64_t> *Result) {
const size_t NumValues = std::max(NewValues.size(), Result->size());
if (NumValues > Result->size())
Result->resize(NumValues, 0);
for (size_t I = 0, End = NewValues.size(); I < End; ++I)
(*Result)[I] += NewValues[I];
}
Expected<SmallVector<int64_t, 4>>
BenchmarkRunner::FunctionExecutor::runAndSample(
const char *Counters, ArrayRef<const char *> ValidationCounters,
SmallVectorImpl<int64_t> &ValidationCounterValues) const {
// We sum counts when there are several counters for a single ProcRes
// (e.g. P23 on SandyBridge).
SmallVector<int64_t, 4> CounterValues;
SmallVector<StringRef, 2> CounterNames;
StringRef(Counters).split(CounterNames, '+');
for (auto &CounterName : CounterNames) {
CounterName = CounterName.trim();
Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter(
CounterName, ValidationCounters, ValidationCounterValues);
if (!ValueOrError)
return ValueOrError.takeError();
accumulateCounterValues(ValueOrError.get(), &CounterValues);
}
return CounterValues;
}
namespace {
class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
public:
static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
BenchmarkRunner::ScratchSpace *Scratch) {
Expected<ExecutableFunction> EF =
ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
if (!EF)
return EF.takeError();
return std::unique_ptr<InProcessFunctionExecutorImpl>(
new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
}
private:
InProcessFunctionExecutorImpl(const LLVMState &State,
ExecutableFunction Function,
BenchmarkRunner::ScratchSpace *Scratch)
: State(State), Function(std::move(Function)), Scratch(Scratch) {}
static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
SmallVector<int64_t, 4> *Result) {
const size_t NumValues = std::max(NewValues.size(), Result->size());
if (NumValues > Result->size())
Result->resize(NumValues, 0);
for (size_t I = 0, End = NewValues.size(); I < End; ++I)
(*Result)[I] += NewValues[I];
}
Expected<SmallVector<int64_t, 4>> runWithCounter(
StringRef CounterName, ArrayRef<const char *> ValidationCounters,
SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
const ExegesisTarget &ET = State.getExegesisTarget();
char *const ScratchPtr = Scratch->ptr();
auto CounterOrError =
ET.createCounter(CounterName, State, ValidationCounters);
if (!CounterOrError)
return CounterOrError.takeError();
pfm::CounterGroup *Counter = CounterOrError.get().get();
Scratch->clear();
{
auto PS = ET.withSavedState();
CrashRecoveryContext CRC;
CrashRecoveryContext::Enable();
const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
Counter->start();
this->Function(ScratchPtr);
Counter->stop();
});
CrashRecoveryContext::Disable();
PS.reset();
if (Crashed) {
#ifdef LLVM_ON_UNIX
// See "Exit Status for Commands":
// https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
constexpr const int kSigOffset = 128;
return make_error<SnippetSignal>(CRC.RetCode - kSigOffset);
#else
// The exit code of the process on windows is not meaningful as a
// signal, so simply pass in -1 as the signal into the error.
return make_error<SnippetSignal>(-1);
#endif // LLVM_ON_UNIX
}
}
auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
if (!ValidationValuesOrErr)
return ValidationValuesOrErr.takeError();
ArrayRef RealValidationValues = *ValidationValuesOrErr;
for (size_t I = 0; I < RealValidationValues.size(); ++I)
ValidationCounterValues[I] = RealValidationValues[I];
return Counter->readOrError(Function.getFunctionBytes());
}
const LLVMState &State;
const ExecutableFunction Function;
BenchmarkRunner::ScratchSpace *const Scratch;
};
#ifdef __linux__
// The following class implements a function executor that executes the
// benchmark code within a subprocess rather than within the main llvm-exegesis
// process. This allows for much more control over the execution context of the
// snippet, particularly with regard to memory. This class performs all the
// necessary functions to create the subprocess, execute the snippet in the
// subprocess, and report results/handle errors.
class SubProcessFunctionExecutorImpl
: public BenchmarkRunner::FunctionExecutor {
public:
static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
const BenchmarkKey &Key) {
Expected<ExecutableFunction> EF =
ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
if (!EF)
return EF.takeError();
return std::unique_ptr<SubProcessFunctionExecutorImpl>(
new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key));
}
private:
SubProcessFunctionExecutorImpl(const LLVMState &State,
ExecutableFunction Function,
const BenchmarkKey &Key)
: State(State), Function(std::move(Function)), Key(Key) {}
enum ChildProcessExitCodeE {
CounterFDReadFailed = 1,
RSeqDisableFailed,
FunctionDataMappingFailed,
AuxiliaryMemorySetupFailed
};
StringRef childProcessExitCodeToString(int ExitCode) const {
switch (ExitCode) {
case ChildProcessExitCodeE::CounterFDReadFailed:
return "Counter file descriptor read failed";
case ChildProcessExitCodeE::RSeqDisableFailed:
return "Disabling restartable sequences failed";
case ChildProcessExitCodeE::FunctionDataMappingFailed:
return "Failed to map memory for assembled snippet";
case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
return "Failed to setup auxiliary memory";
default:
return "Child process returned with unknown exit code";
}
}
Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
struct msghdr Message = {};
char Buffer[CMSG_SPACE(sizeof(FD))];
memset(Buffer, 0, sizeof(Buffer));
Message.msg_control = Buffer;
Message.msg_controllen = sizeof(Buffer);
struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
ControlMessage->cmsg_level = SOL_SOCKET;
ControlMessage->cmsg_type = SCM_RIGHTS;
ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD));
Message.msg_controllen = CMSG_SPACE(sizeof(FD));
ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0);
if (BytesWritten < 0)
return make_error<Failure>("Failed to write FD to socket: " +
Twine(strerror(errno)));
return Error::success();
}
Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
struct msghdr Message = {};
char ControlBuffer[256];
Message.msg_control = ControlBuffer;
Message.msg_controllen = sizeof(ControlBuffer);
ssize_t BytesRead = recvmsg(SocketFD, &Message, 0);
if (BytesRead < 0)
return make_error<Failure>("Failed to read FD from socket: " +
Twine(strerror(errno)));
struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
int FD;
if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
return make_error<Failure>("Failed to get correct number of bytes for "
"file descriptor from socket.");
memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD));
return FD;
}
Error createSubProcessAndRunBenchmark(
StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
ArrayRef<const char *> ValidationCounters,
SmallVectorImpl<int64_t> &ValidationCounterValues) const {
int PipeFiles[2];
int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles);
if (PipeSuccessOrErr != 0) {
return make_error<Failure>(
"Failed to create a pipe for interprocess communication between "
"llvm-exegesis and the benchmarking subprocess: " +
Twine(strerror(errno)));
}
SubprocessMemory SPMemory;
Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid());
if (MemoryInitError)
return MemoryInitError;
Error AddMemDefError =
SPMemory.addMemoryDefinition(Key.MemoryValues, getpid());
if (AddMemDefError)
return AddMemDefError;
long ParentTID = get_threadid();
pid_t ParentOrChildPID = fork();
if (ParentOrChildPID == -1) {
return make_error<Failure>("Failed to create child process: " +
Twine(strerror(errno)));
}
if (ParentOrChildPID == 0) {
// We are in the child process, close the write end of the pipe.
close(PipeFiles[1]);
// Unregister handlers, signal handling is now handled through ptrace in
// the host process.
sys::unregisterHandlers();
prepareAndRunBenchmark(PipeFiles[0], Key, ParentTID);
// The child process terminates in the above function, so we should never
// get to this point.
llvm_unreachable("Child process didn't exit when expected.");
}
const ExegesisTarget &ET = State.getExegesisTarget();
auto CounterOrError = ET.createCounter(
CounterName, State, ValidationCounters, ParentOrChildPID);
if (!CounterOrError)
return CounterOrError.takeError();
pfm::CounterGroup *Counter = CounterOrError.get().get();
close(PipeFiles[0]);
// Make sure to attach to the process (and wait for the sigstop to be
// delivered and for the process to continue) before we write to the counter
// file descriptor. Attaching to the process before writing to the socket
// ensures that the subprocess at most has blocked on the read call. If we
// attach afterwards, the subprocess might exit before we get to the attach
// call due to effects like scheduler contention, introducing transient
// failures.
if (ptrace(PTRACE_ATTACH, ParentOrChildPID, NULL, NULL) != 0)
return make_error<Failure>("Failed to attach to the child process: " +
Twine(strerror(errno)));
if (wait(NULL) == -1) {
return make_error<Failure>(
"Failed to wait for child process to stop after attaching: " +
Twine(strerror(errno)));
}
if (ptrace(PTRACE_CONT, ParentOrChildPID, NULL, NULL) != 0)
return make_error<Failure>(
"Failed to continue execution of the child process: " +
Twine(strerror(errno)));
int CounterFileDescriptor = Counter->getFileDescriptor();
Error SendError =
sendFileDescriptorThroughSocket(PipeFiles[1], CounterFileDescriptor);
if (SendError)
return SendError;
int ChildStatus;
if (wait(&ChildStatus) == -1) {
return make_error<Failure>(
"Waiting for the child process to complete failed: " +
Twine(strerror(errno)));
}
if (WIFEXITED(ChildStatus)) {
int ChildExitCode = WEXITSTATUS(ChildStatus);
if (ChildExitCode == 0) {
// The child exited succesfully, read counter values and return
// success.
auto CounterValueOrErr = Counter->readOrError();
if (!CounterValueOrErr)
return CounterValueOrErr.takeError();
CounterValues = std::move(*CounterValueOrErr);
auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
if (!ValidationValuesOrErr)
return ValidationValuesOrErr.takeError();
ArrayRef RealValidationValues = *ValidationValuesOrErr;
for (size_t I = 0; I < RealValidationValues.size(); ++I)
ValidationCounterValues[I] = RealValidationValues[I];
return Error::success();
}
// The child exited, but not successfully.
return make_error<Failure>(
"Child benchmarking process exited with non-zero exit code: " +
childProcessExitCodeToString(ChildExitCode));
}
// An error was encountered running the snippet, process it
siginfo_t ChildSignalInfo;
if (ptrace(PTRACE_GETSIGINFO, ParentOrChildPID, NULL, &ChildSignalInfo) ==
-1) {
return make_error<Failure>("Getting signal info from the child failed: " +
Twine(strerror(errno)));
}
if (ChildSignalInfo.si_signo == SIGSEGV)
return make_error<SnippetSegmentationFault>(
reinterpret_cast<intptr_t>(ChildSignalInfo.si_addr));
return make_error<SnippetSignal>(ChildSignalInfo.si_signo);
}
void disableCoreDumps() const {
struct rlimit rlim;
rlim.rlim_cur = 0;
setrlimit(RLIMIT_CORE, &rlim);
}
[[noreturn]] void prepareAndRunBenchmark(int Pipe, const BenchmarkKey &Key,
long ParentTID) const {
// Disable core dumps in the child process as otherwise everytime we
// encounter an execution failure like a segmentation fault, we will create
// a core dump. We report the information directly rather than require the
// user inspect a core dump.
disableCoreDumps();
// The following occurs within the benchmarking subprocess.
pid_t ParentPID = getppid();
Expected<int> CounterFileDescriptorOrError =
getFileDescriptorFromSocket(Pipe);
if (!CounterFileDescriptorOrError)
exit(ChildProcessExitCodeE::CounterFDReadFailed);
int CounterFileDescriptor = *CounterFileDescriptorOrError;
// Glibc versions greater than 2.35 automatically call rseq during
// initialization. Unmapping the region that glibc sets up for this causes
// segfaults in the program. Unregister the rseq region so that we can safely
// unmap it later
#ifdef GLIBC_INITS_RSEQ
long RseqDisableOutput =
syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
__rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
if (RseqDisableOutput != 0)
exit(ChildProcessExitCodeE::RSeqDisableFailed);
#endif // GLIBC_INITS_RSEQ
// The frontend that generates the memory annotation structures should
// validate that the address to map the snippet in at is a multiple of
// the page size. Assert that this is true here.
assert(Key.SnippetAddress % getpagesize() == 0 &&
"The snippet address needs to be aligned to a page boundary.");
size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
void *MapAddress = NULL;
int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
if (Key.SnippetAddress != 0) {
MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
MapFlags |= MAP_FIXED_NOREPLACE;
}
char *FunctionDataCopy =
(char *)mmap(MapAddress, FunctionDataCopySize, PROT_READ | PROT_WRITE,
MapFlags, 0, 0);
if ((intptr_t)FunctionDataCopy == -1)
exit(ChildProcessExitCodeE::FunctionDataMappingFailed);
memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(),
this->Function.FunctionBytes.size());
mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC);
Expected<int> AuxMemFDOrError =
SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
if (!AuxMemFDOrError)
exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize,
*AuxMemFDOrError);
exit(0);
}
Expected<SmallVector<int64_t, 4>> runWithCounter(
StringRef CounterName, ArrayRef<const char *> ValidationCounters,
SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
SmallVector<int64_t, 4> Value(1, 0);
Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
CounterName, Value, ValidationCounters, ValidationCounterValues);
if (PossibleBenchmarkError)
return std::move(PossibleBenchmarkError);
return Value;
}
const LLVMState &State;
const ExecutableFunction Function;
const BenchmarkKey &Key;
};
#endif // __linux__
} // namespace
Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
unsigned MinInstructions, unsigned LoopBodySize,
bool GenerateMemoryInstructions) const {
const std::vector<MCInst> &Instructions = BC.Key.Instructions;
SmallString<0> Buffer;
raw_svector_ostream OS(Buffer);
if (Error E = assembleToStream(
State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
GenerateMemoryInstructions),
OS, BC.Key, GenerateMemoryInstructions)) {
return std::move(E);
}
return Buffer;
}
Expected<BenchmarkRunner::RunnableConfiguration>
BenchmarkRunner::getRunnableConfiguration(
const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
const SnippetRepetitor &Repetitor) const {
RunnableConfiguration RC;
Benchmark &BenchmarkResult = RC.BenchmarkResult;
BenchmarkResult.Mode = Mode;
BenchmarkResult.CpuName =
std::string(State.getTargetMachine().getTargetCPU());
BenchmarkResult.LLVMTriple =
State.getTargetMachine().getTargetTriple().normalize();
BenchmarkResult.MinInstructions = MinInstructions;
BenchmarkResult.Info = BC.Info;
const std::vector<MCInst> &Instructions = BC.Key.Instructions;
bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
BenchmarkResult.Key = BC.Key;
// Assemble at least kMinInstructionsForSnippet instructions by repeating
// the snippet for debug/analysis. This is so that the user clearly
// understands that the inside instructions are repeated.
if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
const int MinInstructionsForSnippet = 4 * Instructions.size();
const int LoopBodySizeForSnippet = 2 * Instructions.size();
auto Snippet =
assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
LoopBodySizeForSnippet, GenerateMemoryInstructions);
if (Error E = Snippet.takeError())
return std::move(E);
if (auto Err = getBenchmarkFunctionBytes(*Snippet,
BenchmarkResult.AssembledSnippet))
return std::move(Err);
}
// Assemble enough repetitions of the snippet so we have at least
// MinInstructions instructions.
if (BenchmarkPhaseSelector >
BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
auto Snippet =
assembleSnippet(BC, Repetitor, BenchmarkResult.MinInstructions,
LoopBodySize, GenerateMemoryInstructions);
if (Error E = Snippet.takeError())
return std::move(E);
RC.ObjectFile = getObjectFromBuffer(*Snippet);
}
return std::move(RC);
}
Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
BenchmarkRunner::createFunctionExecutor(
object::OwningBinary<object::ObjectFile> ObjectFile,
const BenchmarkKey &Key) const {
switch (ExecutionMode) {
case ExecutionModeE::InProcess: {
auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
State, std::move(ObjectFile), Scratch.get());
if (!InProcessExecutorOrErr)
return InProcessExecutorOrErr.takeError();
return std::move(*InProcessExecutorOrErr);
}
case ExecutionModeE::SubProcess: {
#ifdef __linux__
auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
State, std::move(ObjectFile), Key);
if (!SubProcessExecutorOrErr)
return SubProcessExecutorOrErr.takeError();
return std::move(*SubProcessExecutorOrErr);
#else
return make_error<Failure>(
"The subprocess execution mode is only supported on Linux");
#endif
}
}
llvm_unreachable("ExecutionMode is outside expected range");
}
std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
RunnableConfiguration &&RC,
const std::optional<StringRef> &DumpFile) const {
Benchmark &BenchmarkResult = RC.BenchmarkResult;
object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
if (DumpFile && BenchmarkPhaseSelector >
BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
auto ObjectFilePath =
writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile);
if (Error E = ObjectFilePath.takeError()) {
return {std::move(E), std::move(BenchmarkResult)};
}
outs() << "Check generated assembly with: /usr/bin/objdump -d "
<< *ObjectFilePath << "\n";
}
if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
BenchmarkResult.Error = "actual measurements skipped.";
return {Error::success(), std::move(BenchmarkResult)};
}
Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key);
if (!Executor)
return {Executor.takeError(), std::move(BenchmarkResult)};
auto NewMeasurements = runMeasurements(**Executor);
if (Error E = NewMeasurements.takeError()) {
return {std::move(E), std::move(BenchmarkResult)};
}
assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions");
for (BenchmarkMeasure &BM : *NewMeasurements) {
// Scale the measurements by the number of instructions.
BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
// Scale the measurements by the number of times the entire snippet is
// repeated.
BM.PerSnippetValue /=
std::ceil(BenchmarkResult.MinInstructions /
static_cast<double>(BenchmarkResult.Key.Instructions.size()));
}
BenchmarkResult.Measurements = std::move(*NewMeasurements);
return {Error::success(), std::move(BenchmarkResult)};
}
Expected<std::string>
BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
int ResultFD = 0;
SmallString<256> ResultPath = FileName;
if (Error E = errorCodeToError(
FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o",
ResultFD, ResultPath)
: sys::fs::openFileForReadWrite(
FileName, ResultFD, sys::fs::CD_CreateAlways,
sys::fs::OF_None)))
return std::move(E);
raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
OFS.write(Buffer.data(), Buffer.size());
OFS.flush();
return std::string(ResultPath);
}
static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
const ValidationEvent RHS) {
return static_cast<int>(LHS.first) < static_cast<int>(RHS);
}
Error BenchmarkRunner::getValidationCountersToRun(
SmallVector<const char *> &ValCountersToRun) const {
const PfmCountersInfo &PCI = State.getPfmCounters();
ValCountersToRun.reserve(ValidationCounters.size());
ValCountersToRun.reserve(ValidationCounters.size());
ArrayRef TargetValidationEvents(PCI.ValidationEvents,
PCI.NumValidationEvents);
for (const ValidationEvent RequestedValEvent : ValidationCounters) {
auto ValCounterIt =
lower_bound(TargetValidationEvents, RequestedValEvent, EventLessThan);
if (ValCounterIt == TargetValidationEvents.end() ||
ValCounterIt->first != RequestedValEvent)
return make_error<Failure>("Cannot create validation counter");
assert(ValCounterIt->first == RequestedValEvent &&
"The array of validation events from the target should be sorted");
ValCountersToRun.push_back(ValCounterIt->second);
}
return Error::success();
}
BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
} // namespace exegesis
} // namespace llvm