
MisExpect diagnostics should not prevent compilation from succeeding, and the assertion is insufficient to prevent division by zero in release builds. This patch addresses that by replacing the assert with an early return. Additionally, it disables MisExpect diagnostics when using sample profiling, since this is the only known case where this error has manifested. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D124302
250 lines
9.3 KiB
C++
250 lines
9.3 KiB
C++
//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This contains code to emit warnings for potentially incorrect usage of the
|
|
// llvm.expect intrinsic. This utility extracts the threshold values from
|
|
// metadata associated with the instrumented Branch or Switch instruction. The
|
|
// threshold values are then used to determine if a warning should be emmited.
|
|
//
|
|
// MisExpect's implementation relies on two assumptions about how branch weights
|
|
// are managed in LLVM.
|
|
//
|
|
// 1) Frontend profiling weights are always in place before llvm.expect is
|
|
// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
|
|
// needs to extract the branch weights and then compare them to the weights
|
|
// being added by the llvm.expect intrinsic lowering.
|
|
//
|
|
// 2) Sampling and IR based profiles will *only* have branch weight metadata
|
|
// before profiling data is consulted if they are from a lowered llvm.expect
|
|
// intrinsic. These profiles thus always extract the expected weights and then
|
|
// compare them to the weights collected during profiling to determine if a
|
|
// diagnostic message is warranted.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Transforms/Utils/MisExpect.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/IR/DiagnosticInfo.h"
|
|
#include "llvm/IR/Instruction.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/LLVMContext.h"
|
|
#include "llvm/Support/BranchProbability.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
#include <cstdint>
|
|
#include <functional>
|
|
#include <numeric>
|
|
|
|
#define DEBUG_TYPE "misexpect"
|
|
|
|
using namespace llvm;
|
|
using namespace misexpect;
|
|
|
|
namespace llvm {
|
|
|
|
// Command line option to enable/disable the warning when profile data suggests
|
|
// a mismatch with the use of the llvm.expect intrinsic
|
|
static cl::opt<bool> PGOWarnMisExpect(
|
|
"pgo-warn-misexpect", cl::init(false), cl::Hidden,
|
|
cl::desc("Use this option to turn on/off "
|
|
"warnings about incorrect usage of llvm.expect intrinsics."));
|
|
|
|
static cl::opt<unsigned> MisExpectTolerance(
|
|
"misexpect-tolerance", cl::init(0),
|
|
cl::desc("Prevents emiting diagnostics when profile counts are "
|
|
"within N% of the threshold.."));
|
|
|
|
} // namespace llvm
|
|
|
|
namespace {
|
|
|
|
bool isMisExpectDiagEnabled(LLVMContext &Ctx) {
|
|
return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
|
|
}
|
|
|
|
uint64_t getMisExpectTolerance(LLVMContext &Ctx) {
|
|
return std::max(static_cast<uint64_t>(MisExpectTolerance),
|
|
Ctx.getDiagnosticsMisExpectTolerance());
|
|
}
|
|
|
|
Instruction *getInstCondition(Instruction *I) {
|
|
assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
|
|
Instruction *Ret = nullptr;
|
|
if (auto *B = dyn_cast<BranchInst>(I)) {
|
|
Ret = dyn_cast<Instruction>(B->getCondition());
|
|
}
|
|
// TODO: Find a way to resolve condition location for switches
|
|
// Using the condition of the switch seems to often resolve to an earlier
|
|
// point in the program, i.e. the calculation of the switch condition, rather
|
|
// than the switch's location in the source code. Thus, we should use the
|
|
// instruction to get source code locations rather than the condition to
|
|
// improve diagnostic output, such as the caret. If the same problem exists
|
|
// for branch instructions, then we should remove this function and directly
|
|
// use the instruction
|
|
//
|
|
else if (auto *S = dyn_cast<SwitchInst>(I)) {
|
|
Ret = dyn_cast<Instruction>(S->getCondition());
|
|
}
|
|
return Ret ? Ret : I;
|
|
}
|
|
|
|
void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
|
|
uint64_t ProfCount, uint64_t TotalCount) {
|
|
double PercentageCorrect = (double)ProfCount / TotalCount;
|
|
auto PerString =
|
|
formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
|
|
auto RemStr = formatv(
|
|
"Potential performance regression from use of the llvm.expect intrinsic: "
|
|
"Annotation was correct on {0} of profiled executions.",
|
|
PerString);
|
|
Twine Msg(PerString);
|
|
Instruction *Cond = getInstCondition(I);
|
|
if (isMisExpectDiagEnabled(Ctx))
|
|
Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
|
|
OptimizationRemarkEmitter ORE(I->getParent()->getParent());
|
|
ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace llvm {
|
|
namespace misexpect {
|
|
|
|
// Helper function to extract branch weights into a vector
|
|
Optional<SmallVector<uint32_t, 4>> extractWeights(Instruction *I,
|
|
LLVMContext &Ctx) {
|
|
assert(I && "MisExpect::extractWeights given invalid pointer");
|
|
|
|
auto *ProfileData = I->getMetadata(LLVMContext::MD_prof);
|
|
if (!ProfileData)
|
|
return None;
|
|
|
|
unsigned NOps = ProfileData->getNumOperands();
|
|
if (NOps < 3)
|
|
return None;
|
|
|
|
auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
|
|
if (!ProfDataName || !ProfDataName->getString().equals("branch_weights"))
|
|
return None;
|
|
|
|
SmallVector<uint32_t, 4> Weights(NOps - 1);
|
|
for (unsigned Idx = 1; Idx < NOps; Idx++) {
|
|
ConstantInt *Value =
|
|
mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(Idx));
|
|
uint32_t V = Value->getZExtValue();
|
|
Weights[Idx - 1] = V;
|
|
}
|
|
|
|
return Weights;
|
|
}
|
|
|
|
// TODO: when clang allows c++17, use std::clamp instead
|
|
uint32_t clamp(uint64_t value, uint32_t low, uint32_t hi) {
|
|
if (value > hi)
|
|
return hi;
|
|
if (value < low)
|
|
return low;
|
|
return value;
|
|
}
|
|
|
|
void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
|
|
ArrayRef<uint32_t> ExpectedWeights) {
|
|
// To determine if we emit a diagnostic, we need to compare the branch weights
|
|
// from the profile to those added by the llvm.expect intrinsic.
|
|
// So first, we extract the "likely" and "unlikely" weights from
|
|
// ExpectedWeights And determine the correct weight in the profile to compare
|
|
// against.
|
|
uint64_t LikelyBranchWeight = 0,
|
|
UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
|
|
size_t MaxIndex = 0;
|
|
for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {
|
|
uint32_t V = ExpectedWeights[Idx];
|
|
if (LikelyBranchWeight < V) {
|
|
LikelyBranchWeight = V;
|
|
MaxIndex = Idx;
|
|
}
|
|
if (UnlikelyBranchWeight > V) {
|
|
UnlikelyBranchWeight = V;
|
|
}
|
|
}
|
|
|
|
const uint64_t ProfiledWeight = RealWeights[MaxIndex];
|
|
const uint64_t RealWeightsTotal =
|
|
std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,
|
|
std::plus<uint64_t>());
|
|
const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;
|
|
|
|
uint64_t TotalBranchWeight =
|
|
LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
|
|
|
|
// FIXME: When we've addressed sample profiling, restore the assertion
|
|
//
|
|
// We cannot calculate branch probability if either of these invariants aren't
|
|
// met. However, MisExpect diagnostics should not prevent code from compiling,
|
|
// so we simply forgo emitting diagnostics here, and return early.
|
|
if ((TotalBranchWeight == 0) || (TotalBranchWeight <= LikelyBranchWeight))
|
|
return;
|
|
|
|
// To determine our threshold value we need to obtain the branch probability
|
|
// for the weights added by llvm.expect and use that proportion to calculate
|
|
// our threshold based on the collected profile data.
|
|
auto LikelyProbablilty = BranchProbability::getBranchProbability(
|
|
LikelyBranchWeight, TotalBranchWeight);
|
|
|
|
uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);
|
|
|
|
// clamp tolerance range to [0, 100)
|
|
auto Tolerance = getMisExpectTolerance(I.getContext());
|
|
Tolerance = clamp(Tolerance, 0, 99);
|
|
|
|
// Allow users to relax checking by N% i.e., if they use a 5% tolerance,
|
|
// then we check against 0.95*ScaledThreshold
|
|
if (Tolerance > 0)
|
|
ScaledThreshold *= (1.0 - Tolerance / 100.0);
|
|
|
|
// When the profile weight is below the threshold, we emit the diagnostic
|
|
if (ProfiledWeight < ScaledThreshold)
|
|
emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,
|
|
RealWeightsTotal);
|
|
}
|
|
|
|
void checkBackendInstrumentation(Instruction &I,
|
|
const ArrayRef<uint32_t> RealWeights) {
|
|
auto ExpectedWeightsOpt = extractWeights(&I, I.getContext());
|
|
if (!ExpectedWeightsOpt.hasValue())
|
|
return;
|
|
auto ExpectedWeights = ExpectedWeightsOpt.getValue();
|
|
verifyMisExpect(I, RealWeights, ExpectedWeights);
|
|
}
|
|
|
|
void checkFrontendInstrumentation(Instruction &I,
|
|
const ArrayRef<uint32_t> ExpectedWeights) {
|
|
auto RealWeightsOpt = extractWeights(&I, I.getContext());
|
|
if (!RealWeightsOpt.hasValue())
|
|
return;
|
|
auto RealWeights = RealWeightsOpt.getValue();
|
|
verifyMisExpect(I, RealWeights, ExpectedWeights);
|
|
}
|
|
|
|
void checkExpectAnnotations(Instruction &I,
|
|
const ArrayRef<uint32_t> ExistingWeights,
|
|
bool IsFrontendInstr) {
|
|
if (IsFrontendInstr) {
|
|
checkFrontendInstrumentation(I, ExistingWeights);
|
|
} else {
|
|
checkBackendInstrumentation(I, ExistingWeights);
|
|
}
|
|
}
|
|
|
|
} // namespace misexpect
|
|
} // namespace llvm
|
|
#undef DEBUG_TYPE
|