
This is an attempt to reland #151660 by including a missing STL header found by a buildbot failure. The stable function map could be huge for a large application. Fully loading it is slow and consumes a significant amount of memory, which is unnecessary and drastically slows down compilation especially for non-LTO and distributed-ThinLTO setups. This patch introduces an opt-in lazy loading support for the stable function map. The detailed changes are: - `StableFunctionMap` - The map now stores entries in an `EntryStorage` struct, which includes offsets for serialized entries and a `std::once_flag` for thread-safe lazy loading. - The underlying map type is changed from `DenseMap` to `std::unordered_map` for compatibility with `std::once_flag`. - `contains()`, `size()` and `at()` are implemented to only load requested entries on demand. - Lazy Loading Mechanism - When reading indexed codegen data, if the newly-introduced `-indexed-codegen-data-lazy-loading` flag is set, the stable function map is not fully deserialized up front. The binary format for the stable function map now includes offsets and sizes to support lazy loading. - The safety of lazy loading is guarded by the once flag per function hash. This guarantees that even in a multi-threaded environment, the deserialization for a given function hash will happen exactly once. The first thread to request it performs the load, and subsequent threads will wait for it to complete before using the data. For single-threaded builds, the overhead is negligible (a single check on the once flag). For multi-threaded scenarios, users can omit the flag to retain the previous eager-loading behavior.
293 lines
10 KiB
C++
293 lines
10 KiB
C++
//===-- CodeGenData.cpp ---------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file contains support for codegen data that has stable summary which
|
|
// can be used to optimize the code in the subsequent codegen.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Bitcode/BitcodeWriter.h"
|
|
#include "llvm/CGData/CodeGenDataReader.h"
|
|
#include "llvm/CGData/OutlinedHashTreeRecord.h"
|
|
#include "llvm/CGData/StableFunctionMapRecord.h"
|
|
#include "llvm/Object/ObjectFile.h"
|
|
#include "llvm/Support/Caching.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/WithColor.h"
|
|
|
|
#define DEBUG_TYPE "cg-data"
|
|
|
|
using namespace llvm;
|
|
using namespace cgdata;
|
|
|
|
static cl::opt<bool>
|
|
CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden,
|
|
cl::desc("Emit CodeGen Data into custom sections"));
|
|
static cl::opt<std::string>
|
|
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
|
|
cl::desc("File path to where .cgdata file is read"));
|
|
cl::opt<bool> CodeGenDataThinLTOTwoRounds(
|
|
"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden,
|
|
cl::desc("Enable two-round ThinLTO code generation. The first round "
|
|
"emits codegen data, while the second round uses the emitted "
|
|
"codegen data for further optimizations."));
|
|
|
|
static std::string getCGDataErrString(cgdata_error Err,
|
|
const std::string &ErrMsg = "") {
|
|
std::string Msg;
|
|
raw_string_ostream OS(Msg);
|
|
|
|
switch (Err) {
|
|
case cgdata_error::success:
|
|
OS << "success";
|
|
break;
|
|
case cgdata_error::eof:
|
|
OS << "end of File";
|
|
break;
|
|
case cgdata_error::bad_magic:
|
|
OS << "invalid codegen data (bad magic)";
|
|
break;
|
|
case cgdata_error::bad_header:
|
|
OS << "invalid codegen data (file header is corrupt)";
|
|
break;
|
|
case cgdata_error::empty_cgdata:
|
|
OS << "empty codegen data";
|
|
break;
|
|
case cgdata_error::malformed:
|
|
OS << "malformed codegen data";
|
|
break;
|
|
case cgdata_error::unsupported_version:
|
|
OS << "unsupported codegen data version";
|
|
break;
|
|
}
|
|
|
|
// If optional error message is not empty, append it to the message.
|
|
if (!ErrMsg.empty())
|
|
OS << ": " << ErrMsg;
|
|
|
|
return OS.str();
|
|
}
|
|
|
|
namespace {
|
|
|
|
// FIXME: This class is only here to support the transition to llvm::Error. It
|
|
// will be removed once this transition is complete. Clients should prefer to
|
|
// deal with the Error value directly, rather than converting to error_code.
|
|
class CGDataErrorCategoryType : public std::error_category {
|
|
const char *name() const noexcept override { return "llvm.cgdata"; }
|
|
|
|
std::string message(int IE) const override {
|
|
return getCGDataErrString(static_cast<cgdata_error>(IE));
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
const std::error_category &llvm::cgdata_category() {
|
|
static CGDataErrorCategoryType ErrorCategory;
|
|
return ErrorCategory;
|
|
}
|
|
|
|
std::string CGDataError::message() const {
|
|
return getCGDataErrString(Err, Msg);
|
|
}
|
|
|
|
char CGDataError::ID = 0;
|
|
|
|
namespace {
|
|
|
|
const char *CodeGenDataSectNameCommon[] = {
|
|
#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
|
|
SectNameCommon,
|
|
#include "llvm/CGData/CodeGenData.inc"
|
|
};
|
|
|
|
const char *CodeGenDataSectNameCoff[] = {
|
|
#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
|
|
SectNameCoff,
|
|
#include "llvm/CGData/CodeGenData.inc"
|
|
};
|
|
|
|
const char *CodeGenDataSectNamePrefix[] = {
|
|
#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix,
|
|
#include "llvm/CGData/CodeGenData.inc"
|
|
};
|
|
|
|
} // namespace
|
|
|
|
namespace llvm {
|
|
|
|
std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
|
|
Triple::ObjectFormatType OF,
|
|
bool AddSegmentInfo) {
|
|
std::string SectName;
|
|
|
|
if (OF == Triple::MachO && AddSegmentInfo)
|
|
SectName = CodeGenDataSectNamePrefix[CGSK];
|
|
|
|
if (OF == Triple::COFF)
|
|
SectName += CodeGenDataSectNameCoff[CGSK];
|
|
else
|
|
SectName += CodeGenDataSectNameCommon[CGSK];
|
|
|
|
return SectName;
|
|
}
|
|
|
|
std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr;
|
|
std::once_flag CodeGenData::OnceFlag;
|
|
|
|
CodeGenData &CodeGenData::getInstance() {
|
|
std::call_once(CodeGenData::OnceFlag, []() {
|
|
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
|
|
|
|
if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds)
|
|
Instance->EmitCGData = true;
|
|
else if (!CodeGenDataUsePath.empty()) {
|
|
// Initialize the global CGData if the input file name is given.
|
|
// We do not error-out when failing to parse the input file.
|
|
// Instead, just emit an warning message and fall back as if no CGData
|
|
// were available.
|
|
auto FS = vfs::getRealFileSystem();
|
|
auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS);
|
|
if (Error E = ReaderOrErr.takeError()) {
|
|
warn(std::move(E), CodeGenDataUsePath);
|
|
return;
|
|
}
|
|
// Publish each CGData based on the data type in the header.
|
|
auto Reader = ReaderOrErr->get();
|
|
if (Reader->hasOutlinedHashTree())
|
|
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
|
|
if (Reader->hasStableFunctionMap())
|
|
Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
|
|
}
|
|
});
|
|
return *Instance;
|
|
}
|
|
|
|
namespace IndexedCGData {
|
|
|
|
Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
|
|
using namespace support;
|
|
|
|
static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>,
|
|
"The header should be standard layout type since we use offset "
|
|
"of fields to read.");
|
|
Header H;
|
|
H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
|
|
if (H.Magic != IndexedCGData::Magic)
|
|
return make_error<CGDataError>(cgdata_error::bad_magic);
|
|
H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
|
|
if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion)
|
|
return make_error<CGDataError>(cgdata_error::unsupported_version);
|
|
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
|
|
|
|
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version4,
|
|
"Please update the offset computation below if a new field has "
|
|
"been added to the header.");
|
|
H.OutlinedHashTreeOffset =
|
|
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
|
|
if (H.Version >= 2)
|
|
H.StableFunctionMapOffset =
|
|
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
|
|
|
|
return H;
|
|
}
|
|
|
|
} // end namespace IndexedCGData
|
|
|
|
namespace cgdata {
|
|
|
|
void warn(Twine Message, StringRef Whence, StringRef Hint) {
|
|
WithColor::warning();
|
|
if (!Whence.empty())
|
|
errs() << Whence << ": ";
|
|
errs() << Message << "\n";
|
|
if (!Hint.empty())
|
|
WithColor::note() << Hint << "\n";
|
|
}
|
|
|
|
void warn(Error E, StringRef Whence) {
|
|
if (E.isA<CGDataError>()) {
|
|
handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
|
|
warn(IPE.message(), Whence, "");
|
|
});
|
|
}
|
|
}
|
|
|
|
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
|
|
AddStreamFn AddStream) {
|
|
LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
|
|
<< " in Task " << Task << "\n");
|
|
Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
|
|
AddStream(Task, TheModule.getModuleIdentifier());
|
|
if (Error Err = StreamOrErr.takeError())
|
|
report_fatal_error(std::move(Err));
|
|
std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
|
|
|
|
WriteBitcodeToFile(TheModule, *Stream->OS,
|
|
/*ShouldPreserveUseListOrder=*/true);
|
|
|
|
if (Error Err = Stream->commit())
|
|
report_fatal_error(std::move(Err));
|
|
}
|
|
|
|
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
|
|
unsigned Task,
|
|
LLVMContext &Context,
|
|
ArrayRef<StringRef> IRFiles) {
|
|
LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
|
|
<< " in Task " << Task << "\n");
|
|
auto FileBuffer = MemoryBuffer::getMemBuffer(
|
|
IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false);
|
|
auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
|
|
if (!RestoredModule)
|
|
report_fatal_error(
|
|
Twine("Failed to parse optimized bitcode loaded for Task: ") +
|
|
Twine(Task) + "\n");
|
|
|
|
// Restore the original module identifier.
|
|
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
|
|
return std::move(*RestoredModule);
|
|
}
|
|
|
|
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
|
|
OutlinedHashTreeRecord GlobalOutlineRecord;
|
|
StableFunctionMapRecord GlobalStableFunctionMapRecord;
|
|
stable_hash CombinedHash = 0;
|
|
for (auto File : ObjFiles) {
|
|
if (File.empty())
|
|
continue;
|
|
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
|
|
File, "in-memory object file", /*RequiresNullTerminator=*/false);
|
|
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
|
|
object::ObjectFile::createObjectFile(Buffer->getMemBufferRef());
|
|
if (!BinOrErr)
|
|
return BinOrErr.takeError();
|
|
|
|
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
|
|
if (auto E = CodeGenDataReader::mergeFromObjectFile(
|
|
Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
|
|
&CombinedHash))
|
|
return E;
|
|
}
|
|
|
|
GlobalStableFunctionMapRecord.finalize();
|
|
|
|
if (!GlobalOutlineRecord.empty())
|
|
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
|
|
if (!GlobalStableFunctionMapRecord.empty())
|
|
cgdata::publishStableFunctionMap(
|
|
std::move(GlobalStableFunctionMapRecord.FunctionMap));
|
|
|
|
return CombinedHash;
|
|
}
|
|
|
|
} // end namespace cgdata
|
|
|
|
} // end namespace llvm
|