llvm-project/bolt/lib/Passes/ReorderFunctions.cpp
Vladislav Khmelevsky fd9604952d [BOLT] Set valid index for functions with profiles
Some of the passes that calculates tentative layout like LongJmp and
Golang are expecting that only functions with valid index will be
located in hot text section. But currently functions with valid profiles
and not set index are breaking this logic, to fix this we can move the
hasValidProfile() condition from AssignSections pass to ReorderFunctions.

Vladislav Khmelevsky,
Advanced Software Technology Lab, Huawei

Differential Revision: https://reviews.llvm.org/D127223
2022-06-08 14:13:12 +03:00

474 lines
16 KiB
C++

//===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements ReorderFunctions class.
//
//===----------------------------------------------------------------------===//
#include "bolt/Passes/ReorderFunctions.h"
#include "bolt/Passes/HFSort.h"
#include "llvm/Support/CommandLine.h"
#include <fstream>
#define DEBUG_TYPE "hfsort"
using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<unsigned> Verbosity;
extern cl::opt<uint32_t> RandomSeed;
extern size_t padFunction(const bolt::BinaryFunction &Function);
cl::opt<bolt::ReorderFunctions::ReorderType>
ReorderFunctions("reorder-functions",
cl::desc("reorder and cluster functions (works only with relocations)"),
cl::init(bolt::ReorderFunctions::RT_NONE),
cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE,
"none",
"do not reorder functions"),
clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT,
"exec-count",
"order by execution count"),
clEnumValN(bolt::ReorderFunctions::RT_HFSORT,
"hfsort",
"use hfsort algorithm"),
clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS,
"hfsort+",
"use hfsort+ algorithm"),
clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
"pettis-hansen",
"use Pettis-Hansen algorithm"),
clEnumValN(bolt::ReorderFunctions::RT_RANDOM,
"random",
"reorder functions randomly"),
clEnumValN(bolt::ReorderFunctions::RT_USER,
"user",
"use function order specified by -function-order")),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<bool> ReorderFunctionsUseHotSize(
"reorder-functions-use-hot-size",
cl::desc("use a function's hot size when doing clustering"), cl::init(true),
cl::cat(BoltOptCategory));
static cl::opt<std::string>
FunctionOrderFile("function-order",
cl::desc("file containing an ordered list of functions to use for function "
"reordering"),
cl::cat(BoltOptCategory));
static cl::opt<std::string>
GenerateFunctionOrderFile("generate-function-order",
cl::desc("file to dump the ordered list of functions to use for function "
"reordering"),
cl::cat(BoltOptCategory));
static cl::opt<std::string>
LinkSectionsFile("generate-link-sections",
cl::desc("generate a list of function sections in a format suitable for "
"inclusion in a linker script"),
cl::cat(BoltOptCategory));
static cl::opt<bool>
UseEdgeCounts("use-edge-counts",
cl::desc("use edge count data when doing clustering"),
cl::init(true), cl::cat(BoltOptCategory));
static cl::opt<bool>
CgFromPerfData("cg-from-perf-data",
cl::desc("use perf data directly when constructing the call graph"
" for stale functions"),
cl::init(true),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<bool> CgIgnoreRecursiveCalls(
"cg-ignore-recursive-calls",
cl::desc("ignore recursive calls when constructing the call graph"),
cl::init(true), cl::cat(BoltOptCategory));
static cl::opt<bool>
CgUseSplitHotSize("cg-use-split-hot-size",
cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
"call graph functions"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
namespace bolt {
using NodeId = CallGraph::NodeId;
using Arc = CallGraph::Arc;
using Node = CallGraph::Node;
void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
std::map<uint64_t, BinaryFunction> &BFs) {
std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
uint64_t TotalSize = 0;
uint32_t Index = 0;
// Set order of hot functions based on clusters.
for (const Cluster &Cluster : Clusters) {
for (const NodeId FuncId : Cluster.targets()) {
Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
FuncAddr[FuncId] = TotalSize;
TotalSize += Cg.size(FuncId);
}
}
// Assign valid index for functions with valid profile.
for (auto &It : BFs) {
BinaryFunction &BF = It.second;
if (!BF.hasValidIndex() && BF.hasValidProfile())
BF.setIndex(Index++);
}
if (opts::ReorderFunctions == RT_NONE)
return;
if (opts::Verbosity == 0) {
#ifndef NDEBUG
if (!DebugFlag || !isCurrentDebugType("hfsort"))
return;
#else
return;
#endif
}
bool PrintDetailed = opts::Verbosity > 1;
#ifndef NDEBUG
PrintDetailed |=
(DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
#endif
TotalSize = 0;
uint64_t CurPage = 0;
uint64_t Hotfuncs = 0;
double TotalDistance = 0;
double TotalCalls = 0;
double TotalCalls64B = 0;
double TotalCalls4KB = 0;
double TotalCalls2MB = 0;
if (PrintDetailed)
outs() << "BOLT-INFO: Function reordering page layout\n"
<< "BOLT-INFO: ============== page 0 ==============\n";
for (Cluster &Cluster : Clusters) {
if (PrintDetailed)
outs() << format(
"BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
Cluster.density(), Cluster.samples(), Cluster.size());
for (NodeId FuncId : Cluster.targets()) {
if (Cg.samples(FuncId) > 0) {
Hotfuncs++;
if (PrintDetailed)
outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
<< Cg.size(FuncId) << ")\n";
uint64_t Dist = 0;
uint64_t Calls = 0;
for (NodeId Dst : Cg.successors(FuncId)) {
if (FuncId == Dst) // ignore recursive calls in stats
continue;
const Arc &Arc = *Cg.findArc(FuncId, Dst);
const auto D = std::abs(FuncAddr[Arc.dst()] -
(FuncAddr[FuncId] + Arc.avgCallOffset()));
const double W = Arc.weight();
if (D < 64 && PrintDetailed && opts::Verbosity > 2)
outs() << "BOLT-INFO: short (" << D << "B) call:\n"
<< "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
<< "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
<< "BOLT-INFO: Weight = " << W << "\n"
<< "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() << "\n";
Calls += W;
if (D < 64) TotalCalls64B += W;
if (D < 4096) TotalCalls4KB += W;
if (D < (2 << 20)) TotalCalls2MB += W;
Dist += Arc.weight() * D;
if (PrintDetailed)
outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
"weight = %.0lf, callDist = %f\n",
Arc.src(),
FuncAddr[Arc.src()],
Arc.avgCallOffset(),
Arc.dst(),
FuncAddr[Arc.dst()],
Arc.weight(), D);
}
TotalCalls += Calls;
TotalDistance += Dist;
TotalSize += Cg.size(FuncId);
if (PrintDetailed) {
outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
TotalSize, Calls ? Dist / Calls : 0)
<< Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
const uint64_t NewPage = TotalSize / HugePageSize;
if (NewPage != CurPage) {
CurPage = NewPage;
outs() << format(
"BOLT-INFO: ============== page %u ==============\n", CurPage);
}
}
}
}
}
outs() << "BOLT-INFO: Function reordering stats\n"
<< format("BOLT-INFO: Number of hot functions: %u\n"
"BOLT-INFO: Number of clusters: %lu\n",
Hotfuncs, Clusters.size())
<< format("BOLT-INFO: Final average call distance = %.1lf "
"(%.0lf / %.0lf)\n",
TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
TotalCalls)
<< format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls);
if (TotalCalls)
outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n",
TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
<< format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n",
TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
<< format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n",
TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
}
namespace {
std::vector<std::string> readFunctionOrderFile() {
std::vector<std::string> FunctionNames;
std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
if (!FuncsFile) {
errs() << "Ordered functions file \"" << opts::FunctionOrderFile
<< "\" can't be opened.\n";
exit(1);
}
std::string FuncName;
while (std::getline(FuncsFile, FuncName))
FunctionNames.push_back(FuncName);
return FunctionNames;
}
}
void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
auto &BFs = BC.getBinaryFunctions();
if (opts::ReorderFunctions != RT_NONE &&
opts::ReorderFunctions != RT_EXEC_COUNT &&
opts::ReorderFunctions != RT_USER) {
Cg = buildCallGraph(BC,
[](const BinaryFunction &BF) {
if (!BF.hasProfile())
return true;
if (BF.getState() != BinaryFunction::State::CFG)
return true;
return false;
},
opts::CgFromPerfData,
false, // IncludeColdCalls
opts::ReorderFunctionsUseHotSize,
opts::CgUseSplitHotSize,
opts::UseEdgeCounts,
opts::CgIgnoreRecursiveCalls);
Cg.normalizeArcWeights();
}
std::vector<Cluster> Clusters;
switch (opts::ReorderFunctions) {
case RT_NONE:
break;
case RT_EXEC_COUNT:
{
std::vector<BinaryFunction *> SortedFunctions(BFs.size());
uint32_t Index = 0;
std::transform(BFs.begin(),
BFs.end(),
SortedFunctions.begin(),
[](std::pair<const uint64_t, BinaryFunction> &BFI) {
return &BFI.second;
});
std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
[&](const BinaryFunction *A, const BinaryFunction *B) {
if (A->isIgnored())
return false;
const size_t PadA = opts::padFunction(*A);
const size_t PadB = opts::padFunction(*B);
if (!PadA || !PadB) {
if (PadA)
return true;
if (PadB)
return false;
}
return !A->hasProfile() &&
(B->hasProfile() ||
(A->getExecutionCount() > B->getExecutionCount()));
});
for (BinaryFunction *BF : SortedFunctions)
if (BF->hasProfile())
BF->setIndex(Index++);
}
break;
case RT_HFSORT:
Clusters = clusterize(Cg);
break;
case RT_HFSORT_PLUS:
Clusters = hfsortPlus(Cg);
break;
case RT_PETTIS_HANSEN:
Clusters = pettisAndHansen(Cg);
break;
case RT_RANDOM:
std::srand(opts::RandomSeed);
Clusters = randomClusters(Cg);
break;
case RT_USER:
{
uint32_t Index = 0;
for (const std::string &Function : readFunctionOrderFile()) {
std::vector<uint64_t> FuncAddrs;
BinaryData *BD = BC.getBinaryDataByName(Function);
if (!BD) {
uint32_t LocalID = 1;
while(1) {
// If we can't find the main symbol name, look for alternates.
const std::string FuncName =
Function + "/" + std::to_string(LocalID);
BD = BC.getBinaryDataByName(FuncName);
if (BD)
FuncAddrs.push_back(BD->getAddress());
else
break;
LocalID++;
}
} else {
FuncAddrs.push_back(BD->getAddress());
}
if (FuncAddrs.empty()) {
errs() << "BOLT-WARNING: Reorder functions: can't find function for "
<< Function << ".\n";
continue;
}
for (const uint64_t FuncAddr : FuncAddrs) {
const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
assert(FuncBD);
BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
if (!BF) {
errs() << "BOLT-WARNING: Reorder functions: can't find function for "
<< Function << ".\n";
break;
}
if (!BF->hasValidIndex())
BF->setIndex(Index++);
else if (opts::Verbosity > 0)
errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
<< ".\n";
}
}
}
break;
}
reorder(std::move(Clusters), BFs);
std::unique_ptr<std::ofstream> FuncsFile;
if (!opts::GenerateFunctionOrderFile.empty()) {
FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
std::ios::out);
if (!FuncsFile) {
errs() << "BOLT-ERROR: ordered functions file "
<< opts::GenerateFunctionOrderFile << " cannot be opened\n";
exit(1);
}
}
std::unique_ptr<std::ofstream> LinkSectionsFile;
if (!opts::LinkSectionsFile.empty()) {
LinkSectionsFile =
std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
if (!LinkSectionsFile) {
errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
<< " cannot be opened\n";
exit(1);
}
}
if (FuncsFile || LinkSectionsFile) {
std::vector<BinaryFunction *> SortedFunctions(BFs.size());
std::transform(BFs.begin(), BFs.end(), SortedFunctions.begin(),
[](std::pair<const uint64_t, BinaryFunction> &BFI) {
return &BFI.second;
});
// Sort functions by index.
std::stable_sort(
SortedFunctions.begin(),
SortedFunctions.end(),
[](const BinaryFunction *A, const BinaryFunction *B) {
if (A->hasValidIndex() && B->hasValidIndex())
return A->getIndex() < B->getIndex();
if (A->hasValidIndex() && !B->hasValidIndex())
return true;
if (!A->hasValidIndex() && B->hasValidIndex())
return false;
return A->getAddress() < B->getAddress();
});
for (const BinaryFunction *Func : SortedFunctions) {
if (!Func->hasValidIndex())
break;
if (Func->isPLTFunction())
continue;
if (FuncsFile)
*FuncsFile << Func->getOneName().str() << '\n';
if (LinkSectionsFile) {
const char *Indent = "";
std::vector<StringRef> AllNames = Func->getNames();
std::sort(AllNames.begin(), AllNames.end());
for (StringRef Name : AllNames) {
const size_t SlashPos = Name.find('/');
if (SlashPos != std::string::npos) {
// Avoid duplicates for local functions.
if (Name.find('/', SlashPos + 1) != std::string::npos)
continue;
Name = Name.substr(0, SlashPos);
}
*LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
Indent = " ";
}
}
}
if (FuncsFile) {
FuncsFile->close();
outs() << "BOLT-INFO: dumped function order to "
<< opts::GenerateFunctionOrderFile << '\n';
}
if (LinkSectionsFile) {
LinkSectionsFile->close();
outs() << "BOLT-INFO: dumped linker section order to "
<< opts::LinkSectionsFile << '\n';
}
}
}
} // namespace bolt
} // namespace llvm