Zhaoxuan Jiang 2738828c0e
[Reland] [CGData] Lazy loading support for stable function map (#154491)
This is an attempt to reland #151660 by including a missing STL header
found by a buildbot failure.

The stable function map could be huge for a large application. Fully
loading it is slow and consumes a significant amount of memory, which is
unnecessary and drastically slows down compilation especially for
non-LTO and distributed-ThinLTO setups. This patch introduces an opt-in
lazy loading support for the stable function map. The detailed changes
are:

- `StableFunctionMap`
- The map now stores entries in an `EntryStorage` struct, which includes
offsets for serialized entries and a `std::once_flag` for thread-safe
lazy loading.
- The underlying map type is changed from `DenseMap` to
`std::unordered_map` for compatibility with `std::once_flag`.
- `contains()`, `size()` and `at()` are implemented to only load
requested entries on demand.

- Lazy Loading Mechanism
- When reading indexed codegen data, if the newly-introduced
`-indexed-codegen-data-lazy-loading` flag is set, the stable function
map is not fully deserialized up front. The binary format for the stable
function map now includes offsets and sizes to support lazy loading.
- The safety of lazy loading is guarded by the once flag per function
hash. This guarantees that even in a multi-threaded environment, the
deserialization for a given function hash will happen exactly once. The
first thread to request it performs the load, and subsequent threads
will wait for it to complete before using the data. For single-threaded
builds, the overhead is negligible (a single check on the once flag).
For multi-threaded scenarios, users can omit the flag to retain the
previous eager-loading behavior.
2025-08-20 06:15:04 -07:00

386 lines
12 KiB
C++

//===-- llvm-cgdata.cpp - LLVM CodeGen Data Tool --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// llvm-cgdata parses raw codegen data embedded in compiled binary files, and
// merges them into a single .cgdata file. It can also inspect and maninuplate
// a .cgdata file. This .cgdata can contain various codegen data like outlining
// information, and it can be used to optimize the code in the subsequent build.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/CodeGenDataWriter.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Binary.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::object;
enum CGDataFormat {
Invalid,
Text,
Binary,
};
enum CGDataAction {
Convert,
Merge,
Show,
};
// Command-line option boilerplate.
namespace {
enum ID {
OPT_INVALID = 0, // This is not an option ID.
#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
#include "Opts.inc"
#undef OPTION
};
#define OPTTABLE_STR_TABLE_CODE
#include "Opts.inc"
#undef OPTTABLE_STR_TABLE_CODE
#define OPTTABLE_PREFIXES_TABLE_CODE
#include "Opts.inc"
#undef OPTTABLE_PREFIXES_TABLE_CODE
using namespace llvm::opt;
static constexpr opt::OptTable::Info InfoTable[] = {
#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
#include "Opts.inc"
#undef OPTION
};
class CGDataOptTable : public opt::GenericOptTable {
public:
CGDataOptTable()
: GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
};
} // end anonymous namespace
// Options
static StringRef ToolName;
static std::string OutputFilename = "-";
static std::string Filename;
static bool ShowCGDataVersion;
static bool SkipTrim;
static CGDataAction Action;
static std::optional<CGDataFormat> OutputFormat;
static std::vector<std::string> InputFilenames;
extern cl::opt<bool> IndexedCodeGenDataLazyLoading;
static void exitWithError(Twine Message, StringRef Whence = "",
StringRef Hint = "") {
WithColor::error();
if (!Whence.empty())
errs() << Whence << ": ";
errs() << Message << "\n";
if (!Hint.empty())
WithColor::note() << Hint << "\n";
::exit(1);
}
static void exitWithError(Error E, StringRef Whence = "") {
if (E.isA<CGDataError>()) {
handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
exitWithError(IPE.message(), Whence);
});
return;
}
exitWithError(toString(std::move(E)), Whence);
}
static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
exitWithError(EC.message(), Whence);
}
static int convert_main(int argc, const char *argv[]) {
std::error_code EC;
raw_fd_ostream OS(OutputFilename, EC,
OutputFormat == CGDataFormat::Text
? sys::fs::OF_TextWithCRLF
: sys::fs::OF_None);
if (EC)
exitWithErrorCode(EC, OutputFilename);
auto FS = vfs::getRealFileSystem();
auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS);
if (Error E = ReaderOrErr.takeError())
exitWithError(std::move(E), Filename);
CodeGenDataWriter Writer;
auto Reader = ReaderOrErr->get();
if (Reader->hasOutlinedHashTree()) {
OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree());
Writer.addRecord(Record);
}
if (Reader->hasStableFunctionMap()) {
StableFunctionMapRecord Record(Reader->releaseStableFunctionMap());
Writer.addRecord(Record);
}
if (OutputFormat == CGDataFormat::Text) {
if (Error E = Writer.writeText(OS))
exitWithError(std::move(E));
} else {
if (Error E = Writer.write(OS))
exitWithError(std::move(E));
}
return 0;
}
static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
OutlinedHashTreeRecord &GlobalOutlineRecord,
StableFunctionMapRecord &GlobalFunctionMapRecord);
static bool handleArchive(StringRef Filename, Archive &Arch,
OutlinedHashTreeRecord &GlobalOutlineRecord,
StableFunctionMapRecord &GlobalFunctionMapRecord) {
bool Result = true;
Error Err = Error::success();
for (const auto &Child : Arch.children(Err)) {
auto BuffOrErr = Child.getMemoryBufferRef();
if (Error E = BuffOrErr.takeError())
exitWithError(std::move(E), Filename);
auto NameOrErr = Child.getName();
if (Error E = NameOrErr.takeError())
exitWithError(std::move(E), Filename);
std::string Name = (Filename + "(" + NameOrErr.get() + ")").str();
Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord,
GlobalFunctionMapRecord);
}
if (Err)
exitWithError(std::move(Err), Filename);
return Result;
}
static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
OutlinedHashTreeRecord &GlobalOutlineRecord,
StableFunctionMapRecord &GlobalFunctionMapRecord) {
Expected<std::unique_ptr<object::Binary>> BinOrErr =
object::createBinary(Buffer);
if (Error E = BinOrErr.takeError())
exitWithError(std::move(E), Filename);
bool Result = true;
if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
if (Error E = CodeGenDataReader::mergeFromObjectFile(
Obj, GlobalOutlineRecord, GlobalFunctionMapRecord))
exitWithError(std::move(E), Filename);
} else if (auto *Arch = dyn_cast<Archive>(BinOrErr->get())) {
Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord,
GlobalFunctionMapRecord);
} else {
// TODO: Support for the MachO universal binary format.
errs() << "Error: unsupported binary file: " << Filename << "\n";
Result = false;
}
return Result;
}
static bool handleFile(StringRef Filename,
OutlinedHashTreeRecord &GlobalOutlineRecord,
StableFunctionMapRecord &GlobalFunctionMapRecord) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = BuffOrErr.getError())
exitWithErrorCode(EC, Filename);
return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord,
GlobalFunctionMapRecord);
}
static int merge_main(int argc, const char *argv[]) {
bool Result = true;
OutlinedHashTreeRecord GlobalOutlineRecord;
StableFunctionMapRecord GlobalFunctionMapRecord;
for (auto &Filename : InputFilenames)
Result &=
handleFile(Filename, GlobalOutlineRecord, GlobalFunctionMapRecord);
if (!Result)
exitWithError("failed to merge codegen data files.");
GlobalFunctionMapRecord.finalize(SkipTrim);
CodeGenDataWriter Writer;
if (!GlobalOutlineRecord.empty())
Writer.addRecord(GlobalOutlineRecord);
if (!GlobalFunctionMapRecord.empty())
Writer.addRecord(GlobalFunctionMapRecord);
std::error_code EC;
raw_fd_ostream OS(OutputFilename, EC,
OutputFormat == CGDataFormat::Text
? sys::fs::OF_TextWithCRLF
: sys::fs::OF_None);
if (EC)
exitWithErrorCode(EC, OutputFilename);
if (OutputFormat == CGDataFormat::Text) {
if (Error E = Writer.writeText(OS))
exitWithError(std::move(E));
} else {
if (Error E = Writer.write(OS))
exitWithError(std::move(E));
}
return 0;
}
static int show_main(int argc, const char *argv[]) {
std::error_code EC;
raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
if (EC)
exitWithErrorCode(EC, OutputFilename);
auto FS = vfs::getRealFileSystem();
auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS);
if (Error E = ReaderOrErr.takeError())
exitWithError(std::move(E), Filename);
auto Reader = ReaderOrErr->get();
if (ShowCGDataVersion)
OS << "Version: " << Reader->getVersion() << "\n";
if (Reader->hasOutlinedHashTree()) {
auto Tree = Reader->releaseOutlinedHashTree();
OS << "Outlined hash tree:\n";
OS << " Total Node Count: " << Tree->size() << "\n";
OS << " Terminal Node Count: " << Tree->size(/*GetTerminalCountOnly=*/true)
<< "\n";
OS << " Depth: " << Tree->depth() << "\n";
}
if (Reader->hasStableFunctionMap()) {
auto Map = Reader->releaseStableFunctionMap();
OS << "Stable function map:\n";
OS << " Unique hash Count: " << Map->size() << "\n";
OS << " Total function Count: "
<< Map->size(StableFunctionMap::TotalFunctionCount) << "\n";
OS << " Mergeable function Count: "
<< Map->size(StableFunctionMap::MergeableFunctionCount) << "\n";
}
return 0;
}
static void parseArgs(int argc, char **argv) {
CGDataOptTable Tbl;
ToolName = argv[0];
llvm::BumpPtrAllocator A;
llvm::StringSaver Saver{A};
llvm::opt::InputArgList Args =
Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
llvm::errs() << Msg << '\n';
std::exit(1);
});
if (Args.hasArg(OPT_help)) {
Tbl.printHelp(
llvm::outs(),
"llvm-cgdata <action> [options] (<binary files>|<.cgdata file>)",
ToolName.str().c_str());
std::exit(0);
}
if (Args.hasArg(OPT_version)) {
cl::PrintVersionMessage();
std::exit(0);
}
ShowCGDataVersion = Args.hasArg(OPT_cgdata_version);
SkipTrim = Args.hasArg(OPT_skip_trim);
if (opt::Arg *A = Args.getLastArg(OPT_format)) {
StringRef OF = A->getValue();
OutputFormat = StringSwitch<CGDataFormat>(OF)
.Case("text", CGDataFormat::Text)
.Case("binary", CGDataFormat::Binary)
.Default(CGDataFormat::Invalid);
if (OutputFormat == CGDataFormat::Invalid)
exitWithError("unsupported format '" + OF + "'");
}
InputFilenames = Args.getAllArgValues(OPT_INPUT);
if (InputFilenames.empty())
exitWithError("No input file is specified.");
Filename = InputFilenames[0];
if (Args.hasArg(OPT_output)) {
OutputFilename = Args.getLastArgValue(OPT_output);
for (auto &Filename : InputFilenames)
if (Filename == OutputFilename)
exitWithError(
"Input file name cannot be the same as the output file name!\n");
}
opt::Arg *ActionArg = nullptr;
for (opt::Arg *Arg : Args.filtered(OPT_action_group)) {
if (ActionArg)
exitWithError("Only one action is allowed.");
ActionArg = Arg;
}
if (!ActionArg)
exitWithError("One action is required.");
switch (ActionArg->getOption().getID()) {
case OPT_show:
if (InputFilenames.size() != 1)
exitWithError("only one input file is allowed.");
Action = CGDataAction::Show;
break;
case OPT_convert:
// The default output format is text for convert.
if (!OutputFormat)
OutputFormat = CGDataFormat::Text;
if (InputFilenames.size() != 1)
exitWithError("only one input file is allowed.");
Action = CGDataAction::Convert;
break;
case OPT_merge:
// The default output format is binary for merge.
if (!OutputFormat)
OutputFormat = CGDataFormat::Binary;
Action = CGDataAction::Merge;
break;
default:
llvm_unreachable("unrecognized action");
}
IndexedCodeGenDataLazyLoading =
Args.hasArg(OPT_indexed_codegen_data_lazy_loading);
}
int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) {
const char **argv = const_cast<const char **>(argvNonConst);
parseArgs(argc, argvNonConst);
switch (Action) {
case CGDataAction::Convert:
return convert_main(argc, argv);
case CGDataAction::Merge:
return merge_main(argc, argv);
case CGDataAction::Show:
return show_main(argc, argv);
}
llvm_unreachable("unrecognized action");
}